Fangjun Kuang
Committed by GitHub

Pascal API for non-streaming ASR (#1247)

@@ -115,9 +115,11 @@ jobs: @@ -115,9 +115,11 @@ jobs:
115 if [[ ${{ matrix.os }} == 'windows-latest' ]]; then 115 if [[ ${{ matrix.os }} == 'windows-latest' ]]; then
116 cp -v install/lib/*.dll ../pascal-api-examples/read-wav 116 cp -v install/lib/*.dll ../pascal-api-examples/read-wav
117 cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr 117 cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr
  118 + cp -v install/lib/*.dll ../pascal-api-examples/non-streaming-asr
118 119
119 cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav 120 cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav
120 cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr 121 cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr
  122 + cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/non-streaming-asr
121 fi 123 fi
122 124
123 - name: Run Pascal test (Read wav test) 125 - name: Run Pascal test (Read wav test)
@@ -133,6 +135,48 @@ jobs: @@ -133,6 +135,48 @@ jobs:
133 ls -lh 135 ls -lh
134 popd 136 popd
135 137
  138 + - name: Run Pascal test (Non Streaming ASR)
  139 + shell: bash
  140 + run: |
  141 + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
  142 +
  143 + cd ./pascal-api-examples
  144 +
  145 + pushd non-streaming-asr
  146 + ./run-zipformer-transducer.sh
  147 + rm -rf sherpa-onnx-*
  148 + echo "---"
  149 +
  150 + ./run-whisper.sh
  151 + rm -rf sherpa-onnx-*
  152 + echo "---"
  153 +
  154 + ./run-nemo-transducer.sh
  155 + rm -rf sherpa-onnx-*
  156 + echo "---"
  157 +
  158 + ./run-nemo-ctc.sh
  159 + rm -rf sherpa-onnx-*
  160 + echo "---"
  161 +
  162 + ./run-sense-voice.sh
  163 + rm -rf sherpa-onnx-*
  164 + echo "---"
  165 +
  166 + ./run-telespeech-ctc.sh
  167 + rm -rf sherpa-onnx-*
  168 + echo "---"
  169 +
  170 + ./run-paraformer.sh
  171 +
  172 + ./run-paraformer-itn.sh
  173 +
  174 + rm -rf sherpa-onnx-*
  175 + echo "---"
  176 +
  177 + ls -lh
  178 + popd
  179 +
136 - name: Run Pascal test (Streaming ASR) 180 - name: Run Pascal test (Streaming ASR)
137 shell: bash 181 shell: bash
138 run: | 182 run: |
@@ -141,10 +185,15 @@ jobs: @@ -141,10 +185,15 @@ jobs:
141 cd ./pascal-api-examples 185 cd ./pascal-api-examples
142 186
143 pushd streaming-asr 187 pushd streaming-asr
  188 +
144 ./run-zipformer-transducer.sh 189 ./run-zipformer-transducer.sh
145 rm -rf sherpa-onnx-* 190 rm -rf sherpa-onnx-*
146 echo "---" 191 echo "---"
147 192
  193 + ./run-nemo-transducer.sh
  194 + rm -rf sherpa-onnx-*
  195 + echo "---"
  196 +
148 if [[ ${{ matrix.os }} != 'windows-latest' ]]; then 197 if [[ ${{ matrix.os }} != 'windows-latest' ]]; then
149 ./run-paraformer.sh 198 ./run-paraformer.sh
150 rm -rf sherpa-onnx-* 199 rm -rf sherpa-onnx-*
@@ -25,13 +25,17 @@ @@ -25,13 +25,17 @@
25 25
26 ### Supported programming languages 26 ### Supported programming languages
27 27
28 -| 1. C++ | 2. C | 3. Python | 4. C# | 5. Java | 6. JavaScript |  
29 -|--------|-------|-----------|-------|---------|---------------|  
30 -| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | 28 +| 1. C++ | 2. C | 3. Python | 4. C# | 5. Java |
  29 +|--------|-------|-----------|-------|---------|
  30 +| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
31 31
32 -| 7. Kotlin | 8. Swift | 9. Go | 10. Dart | 11. Rust | 12. Pascal |  
33 -|-----------|----------|-------|----------|----------|------------|  
34 -| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | 32 +| 6. JavaScript | 7. Kotlin | 8. Swift | 9. Go | 10. Dart |
  33 +|---------------|-----------|----------|-------|----------|
  34 +| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
  35 +
  36 +| 11. Rust | 12. Pascal |
  37 +|----------|------------|
  38 +| ✔️ | ✔️ |
35 39
36 For Rust support, please see https://github.com/thewh1teagle/sherpa-rs 40 For Rust support, please see https://github.com/thewh1teagle/sherpa-rs
37 41
@@ -7,3 +7,4 @@ APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). @@ -7,3 +7,4 @@ APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
7 |---------|------------| 7 |---------|------------|
8 |[read-wav](./read-wav)|It shows how to read a wave file.| 8 |[read-wav](./read-wav)|It shows how to read a wave file.|
9 |[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.| 9 |[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.|
  10 +|[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.|
  1 +!run-*.sh
  2 +zipformer_transducer
  3 +whisper
  4 +nemo_transducer
  5 +nemo_ctc
  6 +paraformer
  7 +paraformer_itn
  8 +sense_voice
  9 +telespeech_ctc
  1 +# Introduction
  2 +
  3 +This folder contains examples about using sherpa-onnx's object pascal
  4 +APIs with non-streaming models for speech recognition.
  5 +
  6 +|File|Description|
  7 +|----|-----------|
  8 +|[run-nemo-ctc.sh](./run-nemo-ctc.sh)|Use a non-streaming NeMo CTC model for speech recognition|
  9 +|[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a non-streaming NeMo transducer model for speech recognition|
  10 +|[run-paraformer-itn.sh](./run-paraformer-itn.sh)|Use a non-streaming Paraformer model for speech recognition with inverse text normalization for numbers|
  11 +|[run-paraformer.sh](./run-paraformer.sh)|Use a non-streaming Paraformer model for speech recognition|
  12 +|[run-sense-voice.sh](./run-sense-voice.sh)|Use a non-streaming SenseVoice model for speech recognition|
  13 +|[run-telespeech-ctc.sh](./run-telespeech-ctc.sh)|Use a non-streaming TeleSpeech CTC model for speech recognition|
  14 +|[run-whisper.sh](./run-whisper.sh)|Use a Whisper model for speech recognition|
  15 +|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a non-streaming Zipformer transducer model for speech recognition|
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming NeMo CTC model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program nemo_ctc;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Config.ModelConfig.NeMoCtC.Model := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx';
  37 + Config.ModelConfig.Tokens := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt';
  38 + Config.ModelConfig.Provider := 'cpu';
  39 + Config.ModelConfig.NumThreads := 1;
  40 + Config.ModelConfig.Debug := False;
  41 +
  42 + WaveFilename := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/es-spanish.wav';
  43 +
  44 + Wave := SherpaOnnxReadWave(WaveFilename);
  45 +
  46 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  47 + Stream := Recognizer.CreateStream();
  48 + Start := Now;
  49 +
  50 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  51 + Recognizer.Decode(Stream);
  52 +
  53 + RecognitionResult := Recognizer.GetResult(Stream);
  54 +
  55 + Stop := Now;
  56 +
  57 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  58 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  59 + RealTimeFactor := Elapsed / Duration;
  60 +
  61 + WriteLn(RecognitionResult.ToString);
  62 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  63 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  64 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  65 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  66 +
  67 + {Free resources to avoid memory leak.
  68 +
  69 + Note: You don't need to invoke them for this simple script.
  70 + However, you have to invoke them in your own large/complex project.
  71 + }
  72 + FreeAndNil(Stream);
  73 + FreeAndNil(Recognizer);
  74 +end.
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming NeMo transducer
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program nemo_transducer;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/encoder.onnx';
  37 + Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/decoder.onnx';
  38 + Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/joiner.onnx';
  39 + Config.ModelConfig.ModelType := 'nemo_transducer';
  40 + Config.ModelConfig.Tokens := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt';
  41 + Config.ModelConfig.Provider := 'cpu';
  42 + Config.ModelConfig.NumThreads := 1;
  43 + Config.ModelConfig.Debug := False;
  44 +
  45 + WaveFilename := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav';
  46 +
  47 + Wave := SherpaOnnxReadWave(WaveFilename);
  48 +
  49 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  50 + Stream := Recognizer.CreateStream();
  51 + Start := Now;
  52 +
  53 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  54 + Recognizer.Decode(Stream);
  55 +
  56 + RecognitionResult := Recognizer.GetResult(Stream);
  57 +
  58 + Stop := Now;
  59 +
  60 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  61 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  62 + RealTimeFactor := Elapsed / Duration;
  63 +
  64 + WriteLn(RecognitionResult.ToString);
  65 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  66 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  67 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  68 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  69 +
  70 + {Free resources to avoid memory leak.
  71 +
  72 + Note: You don't need to invoke them for this simple script.
  73 + However, you have to invoke them in your own large/complex project.
  74 + }
  75 + FreeAndNil(Stream);
  76 + FreeAndNil(Recognizer);
  77 +end.
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming Paraformer model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program paraformer;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx';
  37 + Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt';
  38 + Config.ModelConfig.Provider := 'cpu';
  39 + Config.ModelConfig.NumThreads := 1;
  40 + Config.ModelConfig.Debug := False;
  41 +
  42 + WaveFilename := './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/3-sichuan.wav';
  43 +
  44 + Wave := SherpaOnnxReadWave(WaveFilename);
  45 +
  46 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  47 + Stream := Recognizer.CreateStream();
  48 + Start := Now;
  49 +
  50 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  51 + Recognizer.Decode(Stream);
  52 +
  53 + RecognitionResult := Recognizer.GetResult(Stream);
  54 +
  55 + Stop := Now;
  56 +
  57 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  58 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  59 + RealTimeFactor := Elapsed / Duration;
  60 +
  61 + WriteLn(RecognitionResult.ToString);
  62 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  63 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  64 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  65 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  66 +
  67 + {Free resources to avoid memory leak.
  68 +
  69 + Note: You don't need to invoke them for this simple script.
  70 + However, you have to invoke them in your own large/complex project.
  71 + }
  72 + FreeAndNil(Stream);
  73 + FreeAndNil(Recognizer);
  74 +end.
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming Paraformer model
  5 +to decode files with inverse text normalization for numbers.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program paraformer_itn;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx';
  37 + Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt';
  38 + Config.ModelConfig.Provider := 'cpu';
  39 + Config.ModelConfig.NumThreads := 1;
  40 + Config.ModelConfig.Debug := False;
  41 + Config.RuleFsts := './itn_zh_number.fst';
  42 +
  43 + WaveFilename := './itn-zh-number.wav';
  44 +
  45 + Wave := SherpaOnnxReadWave(WaveFilename);
  46 +
  47 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  48 + Stream := Recognizer.CreateStream();
  49 + Start := Now;
  50 +
  51 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  52 + Recognizer.Decode(Stream);
  53 +
  54 + RecognitionResult := Recognizer.GetResult(Stream);
  55 +
  56 + Stop := Now;
  57 +
  58 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  59 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  60 + RealTimeFactor := Elapsed / Duration;
  61 +
  62 + WriteLn(RecognitionResult.ToString);
  63 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  64 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  65 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  66 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  67 +
  68 + {Free resources to avoid memory leak.
  69 +
  70 + Note: You don't need to invoke them for this simple script.
  71 + However, you have to invoke them in your own large/complex project.
  72 + }
  73 + FreeAndNil(Stream);
  74 + FreeAndNil(Recognizer);
  75 +end.
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  29 + tar xvf sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  30 + rm sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  31 +fi
  32 +
  33 +fpc \
  34 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  35 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  36 + ./nemo_ctc.pas
  37 +
  38 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  39 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  40 +
  41 +./nemo_ctc
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  29 +
  30 + tar xvf sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  31 + rm sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  32 +fi
  33 +
  34 +fpc \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./nemo_transducer.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./nemo_transducer
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  29 +
  30 + tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  31 + rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  32 +fi
  33 +
  34 +if [ ! -f ./itn-zh-number.wav ]; then
  35 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  36 +fi
  37 +
  38 +if [ ! -f ./itn_zh_number.fst ]; then
  39 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  40 +fi
  41 +
  42 +fpc \
  43 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  44 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  45 + ./paraformer_itn.pas
  46 +
  47 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  48 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  49 +
  50 +./paraformer_itn
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  29 +
  30 + tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  31 + rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  32 +fi
  33 +
  34 +fpc \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./paraformer.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./paraformer
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  29 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  30 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  31 +fi
  32 +
  33 +fpc \
  34 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  35 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  36 + ./sense_voice.pas
  37 +
  38 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  39 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  40 +
  41 +./sense_voice
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  29 +
  30 + tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  31 + rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  32 +fi
  33 +
  34 +fpc \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./telespeech_ctc.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./telespeech_ctc
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  29 +
  30 + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  31 + rm sherpa-onnx-whisper-tiny.en.tar.bz2
  32 +fi
  33 +
  34 +fpc \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./whisper.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./whisper
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
  29 +
  30 + tar xvf sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
  31 + rm sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
  32 +fi
  33 +
  34 +fpc \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./zipformer_transducer.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./zipformer_transducer
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming SenseVoice model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program sense_voice;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Config.ModelConfig.SenseVoice.Model := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx';
  37 + Config.ModelConfig.SenseVoice.Language := 'auto';
  38 + Config.ModelConfig.SenseVoice.UseItn := False;
  39 + Config.ModelConfig.Tokens := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt';
  40 + Config.ModelConfig.Provider := 'cpu';
  41 + Config.ModelConfig.NumThreads := 1;
  42 + Config.ModelConfig.Debug := False;
  43 +
  44 + WaveFilename := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav';
  45 +
  46 + Wave := SherpaOnnxReadWave(WaveFilename);
  47 +
  48 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  49 + Stream := Recognizer.CreateStream();
  50 + Start := Now;
  51 +
  52 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  53 + Recognizer.Decode(Stream);
  54 +
  55 + RecognitionResult := Recognizer.GetResult(Stream);
  56 +
  57 + Stop := Now;
  58 +
  59 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  60 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  61 + RealTimeFactor := Elapsed / Duration;
  62 +
  63 + WriteLn(RecognitionResult.ToString);
  64 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  65 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  66 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  67 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  68 +
  69 + {Free resources to avoid memory leak.
  70 +
  71 + Note: You don't need to invoke them for this simple script.
  72 + However, you have to invoke them in your own large/complex project.
  73 + }
  74 + FreeAndNil(Stream);
  75 + FreeAndNil(Recognizer);
  76 +end.
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming TeleSpeech CTC model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program telespeech_ctc;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Config.ModelConfig.TeleSpeechCtc := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx';
  37 + Config.ModelConfig.Tokens := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt';
  38 + Config.ModelConfig.Provider := 'cpu';
  39 + Config.ModelConfig.NumThreads := 1;
  40 + Config.ModelConfig.Debug := False;
  41 +
  42 + WaveFilename := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav';
  43 +
  44 + Wave := SherpaOnnxReadWave(WaveFilename);
  45 +
  46 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  47 + Stream := Recognizer.CreateStream();
  48 + Start := Now;
  49 +
  50 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  51 + Recognizer.Decode(Stream);
  52 +
  53 + RecognitionResult := Recognizer.GetResult(Stream);
  54 +
  55 + Stop := Now;
  56 +
  57 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  58 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  59 + RealTimeFactor := Elapsed / Duration;
  60 +
  61 + WriteLn(RecognitionResult.ToString);
  62 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  63 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  64 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  65 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  66 +
  67 + {Free resources to avoid memory leak.
  68 +
  69 + Note: You don't need to invoke them for this simple script.
  70 + However, you have to invoke them in your own large/complex project.
  71 + }
  72 + FreeAndNil(Stream);
  73 + FreeAndNil(Recognizer);
  74 +end.
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming Whisper model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program whisper;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Config.ModelConfig.Whisper.Encoder := './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
  37 + Config.ModelConfig.Whisper.Decoder := './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
  38 + Config.ModelConfig.Tokens := './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
  39 + Config.ModelConfig.Provider := 'cpu';
  40 + Config.ModelConfig.NumThreads := 1;
  41 + Config.ModelConfig.Debug := False;
  42 +
  43 + WaveFilename := './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
  44 +
  45 + Wave := SherpaOnnxReadWave(WaveFilename);
  46 +
  47 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  48 + Stream := Recognizer.CreateStream();
  49 + Start := Now;
  50 +
  51 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  52 + Recognizer.Decode(Stream);
  53 +
  54 + RecognitionResult := Recognizer.GetResult(Stream);
  55 +
  56 + Stop := Now;
  57 +
  58 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  59 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  60 + RealTimeFactor := Elapsed / Duration;
  61 +
  62 + WriteLn(RecognitionResult.ToString);
  63 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  64 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  65 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  66 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  67 +
  68 + {Free resources to avoid memory leak.
  69 +
  70 + Note: You don't need to invoke them for this simple script.
  71 + However, you have to invoke them in your own large/complex project.
  72 + }
  73 + FreeAndNil(Stream);
  74 + FreeAndNil(Recognizer);
  75 +end.
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming Zipformer transducer
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program zipformer_transducer;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx';
  37 + Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx';
  38 + Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.onnx';
  39 + Config.ModelConfig.Tokens := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt';
  40 + Config.ModelConfig.Provider := 'cpu';
  41 + Config.ModelConfig.NumThreads := 1;
  42 + Config.ModelConfig.Debug := False;
  43 +
  44 + WaveFilename := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/test_wavs/1089-134686-0001.wav';
  45 +
  46 + Wave := SherpaOnnxReadWave(WaveFilename);
  47 +
  48 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  49 + Stream := Recognizer.CreateStream();
  50 + Start := Now;
  51 +
  52 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  53 + Recognizer.Decode(Stream);
  54 +
  55 + RecognitionResult := Recognizer.GetResult(Stream);
  56 +
  57 + Stop := Now;
  58 +
  59 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  60 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  61 + RealTimeFactor := Elapsed / Duration;
  62 +
  63 + WriteLn(RecognitionResult.ToString);
  64 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  65 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  66 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  67 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  68 +
  69 + {Free resources to avoid memory leak.
  70 +
  71 + Note: You don't need to invoke them for this simple script.
  72 + However, you have to invoke them in your own large/complex project.
  73 + }
  74 + FreeAndNil(Stream);
  75 + FreeAndNil(Recognizer);
  76 +end.
  1 +!run-*.sh
1 zipformer_transducer 2 zipformer_transducer
2 paraformer 3 paraformer
3 zipformer_ctc 4 zipformer_ctc
4 zipformer_ctc_hlg 5 zipformer_ctc_hlg
  6 +nemo_transducer
@@ -9,3 +9,4 @@ APIs with streaming models for speech recognition. @@ -9,3 +9,4 @@ APIs with streaming models for speech recognition.
9 |[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition| 9 |[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition|
10 |[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition| 10 |[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition|
11 |[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition| 11 |[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition|
  12 +|[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a NeMo transducer model for speech recognition|
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a streaming NeMo transducer
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program nemo_transducer;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Config: TSherpaOnnxOnlineRecognizerConfig;
  22 + Recognizer: TSherpaOnnxOnlineRecognizer;
  23 + Stream: TSherpaOnnxOnlineStream;
  24 + RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  25 + Wave: TSherpaOnnxWave;
  26 + WaveFilename: AnsiString;
  27 + TailPaddings: array of Single;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Initialize(Config);
  37 +
  38 + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  39 + to download model files used in this file.}
  40 + Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/encoder.onnx';
  41 + Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/decoder.onnx';
  42 + Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/joiner.onnx';
  43 + Config.ModelConfig.Tokens := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens.txt';
  44 + Config.ModelConfig.Provider := 'cpu';
  45 + Config.ModelConfig.NumThreads := 1;
  46 + Config.ModelConfig.Debug := False;
  47 +
  48 + WaveFilename := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/test_wavs/0.wav';
  49 +
  50 + Wave := SherpaOnnxReadWave(WaveFilename);
  51 +
  52 + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
  53 +
  54 + Start := Now;
  55 +
  56 + Stream := Recognizer.CreateStream();
  57 +
  58 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  59 +
  60 + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
  61 + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
  62 +
  63 + Stream.InputFinished();
  64 +
  65 + while Recognizer.IsReady(Stream) do
  66 + Recognizer.Decode(Stream);
  67 +
  68 + RecognitionResult := Recognizer.GetResult(Stream);
  69 +
  70 + Stop := Now;
  71 +
  72 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  73 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  74 + RealTimeFactor := Elapsed / Duration;
  75 +
  76 + WriteLn(RecognitionResult.ToString);
  77 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  78 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  79 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  80 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  81 +
  82 + {Free resources to avoid memory leak.
  83 +
  84 + Note: You don't need to invoke them for this simple script.
  85 + However, you have to invoke them in your own large/complex project.
  86 + }
  87 + FreeAndNil(Stream);
  88 + FreeAndNil(Recognizer);
  89 +end.
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
  29 + tar xvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
  30 + rm sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
  31 +fi
  32 +
  33 +fpc \
  34 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  35 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  36 + ./nemo_transducer.pas
  37 +
  38 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  39 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  40 +
  41 +./nemo_transducer
@@ -110,6 +110,109 @@ type @@ -110,6 +110,109 @@ type
110 function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; 110 function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
111 end; 111 end;
112 112
  113 + TSherpaOnnxOfflineTransducerModelConfig = record
  114 + Encoder: AnsiString;
  115 + Decoder: AnsiString;
  116 + Joiner: AnsiString;
  117 + function ToString: AnsiString;
  118 + end;
  119 +
  120 + TSherpaOnnxOfflineParaformerModelConfig = record
  121 + Model: AnsiString;
  122 + function ToString: AnsiString;
  123 + end;
  124 +
  125 + TSherpaOnnxOfflineNemoEncDecCtcModelConfig = record
  126 + Model: AnsiString;
  127 + function ToString: AnsiString;
  128 + end;
  129 +
  130 + TSherpaOnnxOfflineWhisperModelConfig = record
  131 + Encoder: AnsiString;
  132 + Decoder: AnsiString;
  133 + Language: AnsiString;
  134 + Task: AnsiString;
  135 + TailPaddings: Integer;
  136 + function ToString: AnsiString;
  137 + end;
  138 +
  139 + TSherpaOnnxOfflineTdnnModelConfig = record
  140 + Model: AnsiString;
  141 + function ToString: AnsiString;
  142 + end;
  143 +
  144 + TSherpaOnnxOfflineLMConfig = record
  145 + Model: AnsiString;
  146 + Scale: Single;
  147 + function ToString: AnsiString;
  148 + end;
  149 +
  150 + TSherpaOnnxOfflineSenseVoiceModelConfig = record
  151 + Model: AnsiString;
  152 + Language: AnsiString;
  153 + UseItn: Boolean;
  154 + function ToString: AnsiString;
  155 + end;
  156 +
  157 + TSherpaOnnxOfflineModelConfig = record
  158 + Transducer: TSherpaOnnxOfflineTransducerModelConfig;
  159 + Paraformer: TSherpaOnnxOfflineParaformerModelConfig;
  160 + NeMoCtc: TSherpaOnnxOfflineNemoEncDecCtcModelConfig;
  161 + Whisper: TSherpaOnnxOfflineWhisperModelConfig;
  162 + Tdnn: TSherpaOnnxOfflineTdnnModelConfig;
  163 + Tokens: AnsiString;
  164 + NumThreads: Integer;
  165 + Debug: Boolean;
  166 + Provider: AnsiString;
  167 + ModelType: AnsiString;
  168 + ModelingUnit: AnsiString;
  169 + BpeVocab: AnsiString;
  170 + TeleSpeechCtc: AnsiString;
  171 + SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
  172 + function ToString: AnsiString;
  173 + end;
  174 +
  175 + TSherpaOnnxOfflineRecognizerConfig = record
  176 + FeatConfig: TSherpaOnnxFeatureConfig;
  177 + ModelConfig: TSherpaOnnxOfflineModelConfig;
  178 + LMConfig: TSherpaOnnxOfflineLMConfig;
  179 + DecodingMethod: AnsiString;
  180 + MaxActivePaths: Integer;
  181 + HotwordsFile: AnsiString;
  182 + HotwordsScore: Single;
  183 + RuleFsts: AnsiString;
  184 + RuleFars: AnsiString;
  185 + BlankPenalty: Single;
  186 + function ToString: AnsiString;
  187 + end;
  188 +
  189 + TSherpaOnnxOfflineRecognizerResult = record
  190 + Text: AnsiString;
  191 + Tokens: array of AnsiString;
  192 + Timestamps: array of Single;
  193 + function ToString: AnsiString;
  194 + end;
  195 +
  196 + TSherpaOnnxOfflineStream = class
  197 + private
  198 + Handle: Pointer;
  199 + public
  200 + constructor Create(P: Pointer);
  201 + destructor Destroy; override;
  202 + procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
  203 + end;
  204 +
  205 + TSherpaOnnxOfflineRecognizer = class
  206 + private
  207 + Handle: Pointer;
  208 + public
  209 + constructor Create(Config: TSherpaOnnxOfflineRecognizerConfig);
  210 + destructor Destroy; override;
  211 + function CreateStream: TSherpaOnnxOfflineStream;
  212 + procedure Decode(Stream: TSherpaOnnxOfflineStream);
  213 + function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
  214 + end;
  215 +
113 { It supports reading a single channel wave with 16-bit encoded samples. 216 { It supports reading a single channel wave with 16-bit encoded samples.
114 Samples are normalized to the range [-1, 1]. 217 Samples are normalized to the range [-1, 1].
115 } 218 }
@@ -204,6 +307,68 @@ type @@ -204,6 +307,68 @@ type
204 307
205 PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig; 308 PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig;
206 309
  310 + SherpaOnnxOfflineTransducerModelConfig = record
  311 + Encoder: PAnsiChar;
  312 + Decoder: PAnsiChar;
  313 + Joiner: PAnsiChar;
  314 + end;
  315 + SherpaOnnxOfflineParaformerModelConfig = record
  316 + Model: PAnsiChar;
  317 + end;
  318 + SherpaOnnxOfflineNemoEncDecCtcModelConfig = record
  319 + Model: PAnsiChar;
  320 + end;
  321 + SherpaOnnxOfflineWhisperModelConfig = record
  322 + Encoder: PAnsiChar;
  323 + Decoder: PAnsiChar;
  324 + Language: PAnsiChar;
  325 + Task: PAnsiChar;
  326 + TailPaddings: cint32;
  327 + end;
  328 + SherpaOnnxOfflineTdnnModelConfig = record
  329 + Model: PAnsiChar;
  330 + end;
  331 + SherpaOnnxOfflineLMConfig = record
  332 + Model: PAnsiChar;
  333 + Scale: Single;
  334 + end;
  335 + SherpaOnnxOfflineSenseVoiceModelConfig = record
  336 + Model: PAnsiChar;
  337 + Language: PAnsiChar;
  338 + UseItn: cint32;
  339 + end;
  340 + SherpaOnnxOfflineModelConfig = record
  341 + Transducer: SherpaOnnxOfflineTransducerModelConfig;
  342 + Paraformer: SherpaOnnxOfflineParaformerModelConfig;
  343 + NeMoCtc: SherpaOnnxOfflineNemoEncDecCtcModelConfig;
  344 + Whisper: SherpaOnnxOfflineWhisperModelConfig;
  345 + Tdnn: SherpaOnnxOfflineTdnnModelConfig;
  346 + Tokens: PAnsiChar;
  347 + NumThreads: cint32;
  348 + Debug: cint32;
  349 + Provider: PAnsiChar;
  350 + ModelType: PAnsiChar;
  351 + ModelingUnit: PAnsiChar;
  352 + BpeVocab: PAnsiChar;
  353 + TeleSpeechCtc: PAnsiChar;
  354 + SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig;
  355 + end;
  356 +
  357 + SherpaOnnxOfflineRecognizerConfig = record
  358 + FeatConfig: SherpaOnnxFeatureConfig;
  359 + ModelConfig: SherpaOnnxOfflineModelConfig;
  360 + LMConfig: SherpaOnnxOfflineLMConfig;
  361 + DecodingMethod: PAnsiChar;
  362 + MaxActivePaths: cint32;
  363 + HotwordsFile: PAnsiChar;
  364 + HotwordsScore: Single;
  365 + RuleFsts: PAnsiChar;
  366 + RuleFars: PAnsiChar;
  367 + BlankPenalty: Single;
  368 + end;
  369 +
  370 + PSherpaOnnxOfflineRecognizerConfig = ^SherpaOnnxOfflineRecognizerConfig;
  371 +
207 function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl; 372 function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl;
208 external SherpaOnnxLibName; 373 external SherpaOnnxLibName;
209 374
@@ -244,6 +409,31 @@ function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Poin @@ -244,6 +409,31 @@ function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Poin
244 procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl; 409 procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl;
245 external SherpaOnnxLibName; 410 external SherpaOnnxLibName;
246 411
  412 +function SherpaOnnxCreateOfflineRecognizer(Config: PSherpaOnnxOfflineRecognizerConfig): Pointer; cdecl;
  413 + external SherpaOnnxLibName;
  414 +
  415 +procedure SherpaOnnxDestroyOfflineRecognizer(Recognizer: Pointer); cdecl;
  416 + external SherpaOnnxLibName;
  417 +
  418 +function SherpaOnnxCreateOfflineStream(Recognizer: Pointer): Pointer; cdecl;
  419 + external SherpaOnnxLibName;
  420 +
  421 +procedure SherpaOnnxDestroyOfflineStream(Stream: Pointer); cdecl;
  422 + external SherpaOnnxLibName;
  423 +
  424 +procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer;
  425 + SampleRate: cint32; Samples: pcfloat; N: cint32); cdecl;
  426 + external SherpaOnnxLibName;
  427 +
  428 +procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
  429 + external SherpaOnnxLibName;
  430 +
  431 +function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl;
  432 + external SherpaOnnxLibName;
  433 +
  434 +procedure SherpaOnnxDestroyOfflineStreamResultJson(Json: PAnsiChar); cdecl;
  435 + external SherpaOnnxLibName;
  436 +
247 function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl; 437 function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl;
248 external SherpaOnnxLibName name 'SherpaOnnxReadWave'; 438 external SherpaOnnxLibName name 'SherpaOnnxReadWave';
249 439
@@ -322,7 +512,7 @@ end; @@ -322,7 +512,7 @@ end;
322 512
323 function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString; 513 function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString;
324 begin 514 begin
325 - Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfg := %s, ' + 515 + Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfig := %s, ' +
326 'ModelConfig := %s, ' + 516 'ModelConfig := %s, ' +
327 'DecodingMethod := %s, ' + 517 'DecodingMethod := %s, ' +
328 'MaxActivePaths := %d, ' + 518 'MaxActivePaths := %d, ' +
@@ -375,7 +565,7 @@ begin @@ -375,7 +565,7 @@ begin
375 565
376 Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' + 566 Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' +
377 'Tokens := %s, ' + 567 'Tokens := %s, ' +
378 - 'Timestamps := %s, ' + 568 + 'Timestamps := %s' +
379 ')', 569 ')',
380 [Self.Text, TokensStr, TimestampStr]); 570 [Self.Text, TokensStr, TimestampStr]);
381 end; 571 end;
@@ -531,4 +721,268 @@ begin @@ -531,4 +721,268 @@ begin
531 SherpaOnnxOnlineStreamInputFinished(Self.Handle); 721 SherpaOnnxOnlineStreamInputFinished(Self.Handle);
532 end; 722 end;
533 723
  724 +function TSherpaOnnxOfflineTransducerModelConfig.ToString: AnsiString;
  725 +begin
  726 + Result := Format('TSherpaOnnxOfflineTransducerModelConfig(' +
  727 + 'Encoder := %s, ' +
  728 + 'Decoder := %s, ' +
  729 + 'Joiner := %s' +
  730 + ')',
  731 + [Self.Encoder, Self.Decoder, Self.Joiner]);
  732 +end;
  733 +
  734 +function TSherpaOnnxOfflineParaformerModelConfig.ToString: AnsiString;
  735 +begin
  736 + Result := Format('TSherpaOnnxOfflineParaformerModelConfig(Model := %s)',
  737 + [Self.Model]);
  738 +end;
  739 +
  740 +function TSherpaOnnxOfflineNemoEncDecCtcModelConfig.ToString: AnsiString;
  741 +begin
  742 + Result := Format('TSherpaOnnxOfflineNemoEncDecCtcModelConfig(Model := %s)',
  743 + [Self.Model]);
  744 +end;
  745 +
  746 +function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
  747 +begin
  748 + Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' +
  749 + 'Encoder := %s, ' +
  750 + 'Decoder := %s, ' +
  751 + 'Language := %s, ' +
  752 + 'Task := %s, ' +
  753 + 'TailPaddings := %d' +
  754 + ')',
  755 + [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
  756 +end;
  757 +
  758 +function TSherpaOnnxOfflineTdnnModelConfig.ToString: AnsiString;
  759 +begin
  760 + Result := Format('TSherpaOnnxOfflineTdnnModelConfig(Model := %s)',
  761 + [Self.Model]);
  762 +end;
  763 +
  764 +function TSherpaOnnxOfflineLMConfig.ToString: AnsiString;
  765 +begin
  766 + Result := Format('TSherpaOnnxOfflineLMConfig(' +
  767 + 'Model := %s, ' +
  768 + 'Scale := %.1f' +
  769 + ')',
  770 + [Self.Model, Self.Scale]);
  771 +end;
  772 +
  773 +function TSherpaOnnxOfflineSenseVoiceModelConfig.ToString: AnsiString;
  774 +begin
  775 + Result := Format('TSherpaOnnxOfflineSenseVoiceModelConfig(' +
  776 + 'Model := %s, ' +
  777 + 'Language := %s, ' +
  778 + 'UseItn := %s' +
  779 + ')',
  780 + [Self.Model, Self.Language, Self.UseItn.ToString]);
  781 +end;
  782 +
  783 +function TSherpaOnnxOfflineModelConfig.ToString: AnsiString;
  784 +begin
  785 + Result := Format('TSherpaOnnxOfflineModelConfig(' +
  786 + 'Transducer := %s, ' +
  787 + 'Paraformer := %s, ' +
  788 + 'NeMoCtc := %s, ' +
  789 + 'Whisper := %s, ' +
  790 + 'Tdnn := %s, ' +
  791 + 'Tokens := %s, ' +
  792 + 'NumThreads := %d, ' +
  793 + 'Debug := %s, ' +
  794 + 'Provider := %s, ' +
  795 + 'ModelType := %s, ' +
  796 + 'ModelingUnit := %s, ' +
  797 + 'BpeVocab := %s, ' +
  798 + 'TeleSpeechCtc := %s, ' +
  799 + 'SenseVoice := %s' +
  800 + ')',
  801 + [Self.Transducer.ToString, Self.Paraformer.ToString,
  802 + Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
  803 + Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
  804 + Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
  805 + Self.TeleSpeechCtc, Self.SenseVoice.ToString
  806 + ]);
  807 +end;
  808 +
  809 +function TSherpaOnnxOfflineRecognizerConfig.ToString: AnsiString;
  810 +begin
  811 + Result := Format('TSherpaOnnxOfflineRecognizerConfig(' +
  812 + 'FeatConfig := %s, ' +
  813 + 'ModelConfig := %s, ' +
  814 + 'LMConfig := %s, ' +
  815 + 'DecodingMethod := %s, ' +
  816 + 'MaxActivePaths := %d, ' +
  817 + 'HotwordsFile := %s, ' +
  818 + 'HotwordsScore := %.1f, ' +
  819 + 'RuleFsts := %s, ' +
  820 + 'RuleFars := %s, ' +
  821 + 'BlankPenalty := %1.f' +
  822 + ')',
  823 + [Self.FeatConfig.ToString, Self.ModelConfig.ToString,
  824 + Self.LMConfig.ToString, Self.DecodingMethod, Self.MaxActivePaths,
  825 + Self.HotwordsFile, Self.HotwordsScore, Self.RuleFsts, Self.RuleFars,
  826 + Self.BlankPenalty
  827 + ]);
  828 +end;
  829 +
  830 +constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig);
  831 +var
  832 + C: SherpaOnnxOfflineRecognizerConfig;
  833 +begin
  834 + Initialize(C);
  835 +
  836 + C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate;
  837 + C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim;
  838 +
  839 + C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder);
  840 + C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder);
  841 + C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner);
  842 +
  843 + C.ModelConfig.Paraformer.Model := PAnsiChar(Config.ModelConfig.Paraformer.Model);
  844 + C.ModelConfig.NeMoCtc.Model := PAnsiChar(Config.ModelConfig.NeMoCtc.Model);
  845 +
  846 + C.ModelConfig.Whisper.Encoder := PAnsiChar(Config.ModelConfig.Whisper.Encoder);
  847 + C.ModelConfig.Whisper.Decoder := PAnsiChar(Config.ModelConfig.Whisper.Decoder);
  848 + C.ModelConfig.Whisper.Language := PAnsiChar(Config.ModelConfig.Whisper.Language);
  849 + C.ModelConfig.Whisper.Task := PAnsiChar(Config.ModelConfig.Whisper.Task);
  850 + C.ModelConfig.Whisper.TailPaddings := Config.ModelConfig.Whisper.TailPaddings;
  851 +
  852 + C.ModelConfig.Tdnn.Model := PAnsiChar(Config.ModelConfig.Tdnn.Model);
  853 +
  854 +
  855 + C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
  856 + C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
  857 + C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug);
  858 + C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider);
  859 + C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType);
  860 + C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit);
  861 + C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab);
  862 + C.ModelConfig.TeleSpeechCtc := PAnsiChar(Config.ModelConfig.TeleSpeechCtc);
  863 +
  864 + C.ModelConfig.SenseVoice.Model := PAnsiChar(Config.ModelConfig.SenseVoice.Model);
  865 + C.ModelConfig.SenseVoice.Language := PAnsiChar(Config.ModelConfig.SenseVoice.Language);
  866 + C.ModelConfig.SenseVoice.UseItn := Ord(Config.ModelConfig.SenseVoice.UseItn);
  867 +
  868 + C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
  869 + C.LMConfig.Scale := Config.LMConfig.Scale;
  870 +
  871 + C.DecodingMethod := PAnsiChar(Config.DecodingMethod);
  872 + C.MaxActivePaths := Config.MaxActivePaths;
  873 + C.HotwordsFile := PAnsiChar(Config.HotwordsFile);
  874 + C.HotwordsScore := Config.HotwordsScore;
  875 + C.RuleFsts := PAnsiChar(Config.RuleFsts);
  876 + C.RuleFars := PAnsiChar(Config.RuleFars);
  877 + C.BlankPenalty := Config.BlankPenalty;
  878 +
  879 + Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C);
  880 +end;
  881 +
  882 +destructor TSherpaOnnxOfflineRecognizer.Destroy;
  883 +begin
  884 + SherpaOnnxDestroyOfflineRecognizer(Self.Handle);
  885 + Self.Handle := nil;
  886 +end;
  887 +
  888 +function TSherpaOnnxOfflineRecognizer.CreateStream: TSherpaOnnxOfflineStream;
  889 +var
  890 + Stream: Pointer;
  891 +begin
  892 + Stream := SherpaOnnxCreateOfflineStream(Self.Handle);
  893 + Result := TSherpaOnnxOfflineStream.Create(Stream);
  894 +end;
  895 +
  896 +procedure TSherpaOnnxOfflineRecognizer.Decode(Stream: TSherpaOnnxOfflineStream);
  897 +begin
  898 + SherpaOnnxDecodeOfflineStream(Self.Handle, Stream.Handle);
  899 +end;
  900 +
  901 +function TSherpaOnnxOfflineRecognizer.GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
  902 +var
  903 + pJson: PAnsiChar;
  904 + JsonData: TJSONData;
  905 + JsonObject : TJSONObject;
  906 + JsonEnum: TJSONEnum;
  907 + I: Integer;
  908 +begin
  909 + pJson := SherpaOnnxGetOfflineStreamResultAsJson(Stream.Handle);
  910 +
  911 + JsonData := GetJSON(AnsiString(pJson), False);
  912 +
  913 + JsonObject := JsonData as TJSONObject;
  914 +
  915 + Result.Text := JsonObject.Strings['text'];
  916 +
  917 + SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count);
  918 +
  919 + I := 0;
  920 + for JsonEnum in JsonObject.Arrays['tokens'] do
  921 + begin
  922 + Result.Tokens[I] := JsonEnum.Value.AsString;
  923 + Inc(I);
  924 + end;
  925 +
  926 + SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count);
  927 + I := 0;
  928 + for JsonEnum in JsonObject.Arrays['timestamps'] do
  929 + begin
  930 + Result.Timestamps[I] := JsonEnum.Value.AsFloat;
  931 + Inc(I);
  932 + end;
  933 +
  934 + SherpaOnnxDestroyOfflineStreamResultJson(pJson);
  935 +end;
  936 +
  937 +constructor TSherpaOnnxOfflineStream.Create(P: Pointer);
  938 +begin
  939 + Self.Handle := P;
  940 +end;
  941 +
  942 +destructor TSherpaOnnxOfflineStream.Destroy;
  943 +begin
  944 + SherpaOnnxDestroyOfflineStream(Self.Handle);
  945 + Self.Handle := nil;
  946 +end;
  947 +
  948 +procedure TSherpaOnnxOfflineStream.AcceptWaveform(Samples: array of Single; SampleRate: Integer);
  949 +begin
  950 + SherpaOnnxAcceptWaveformOffline(Self.Handle, SampleRate, pcfloat(Samples),
  951 + Length(Samples));
  952 +end;
  953 +
  954 +function TSherpaOnnxOfflineRecognizerResult.ToString: AnsiString;
  955 +var
  956 + TokensStr: AnsiString;
  957 + S: AnsiString;
  958 + TimestampStr: AnsiString;
  959 + T: Single;
  960 + Sep: AnsiString;
  961 +begin
  962 + TokensStr := '[';
  963 + Sep := '';
  964 + for S in Self.Tokens do
  965 + begin
  966 + TokensStr := TokensStr + Sep + S;
  967 + Sep := ', ';
  968 + end;
  969 + TokensStr := TokensStr + ']';
  970 +
  971 + TimestampStr := '[';
  972 + Sep := '';
  973 + for T in Self.Timestamps do
  974 + begin
  975 + TimestampStr := TimestampStr + Sep + Format('%.2f', [T]);
  976 + Sep := ', ';
  977 + end;
  978 + TimestampStr := TimestampStr + ']';
  979 +
  980 + Result := Format('TSherpaOnnxOfflineRecognizerResult(Text := %s, ' +
  981 + 'Tokens := %s, ' +
  982 + 'Timestamps := %s' +
  983 + ')',
  984 + [Self.Text, TokensStr, TimestampStr]);
  985 +end;
  986 +
534 end. 987 end.
  988 +