Fangjun Kuang
Committed by GitHub

Pascal API for streaming ASR (#1246)

@@ -39,7 +39,7 @@ jobs: @@ -39,7 +39,7 @@ jobs:
39 strategy: 39 strategy:
40 fail-fast: false 40 fail-fast: false
41 matrix: 41 matrix:
42 - os: [ubuntu-latest, macos-latest, macos-13] 42 + os: [ubuntu-latest, macos-latest, macos-13, windows-latest]
43 43
44 steps: 44 steps:
45 - uses: actions/checkout@v4 45 - uses: actions/checkout@v4
@@ -64,10 +64,19 @@ jobs: @@ -64,10 +64,19 @@ jobs:
64 run: | 64 run: |
65 brew install fpc 65 brew install fpc
66 # brew install --cask lazarus 66 # brew install --cask lazarus
  67 + #
  68 + - name: Install Free pascal compiler (windows)
  69 + if: matrix.os == 'windows-latest'
  70 + shell: bash
  71 + run: |
  72 + choco install lazarus
  73 +
  74 + ls -lh /c/lazarus/fpc/3.2.2/bin/x86_64-win64/
67 75
68 - name: FPC info 76 - name: FPC info
69 shell: bash 77 shell: bash
70 run: | 78 run: |
  79 + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
71 which fpc 80 which fpc
72 fpc -i 81 fpc -i
73 82
@@ -87,6 +96,7 @@ jobs: @@ -87,6 +96,7 @@ jobs:
87 cd build 96 cd build
88 97
89 cmake \ 98 cmake \
  99 + -DCMAKE_INSTALL_PREFIX=./install \
90 -D BUILD_SHARED_LIBS=ON \ 100 -D BUILD_SHARED_LIBS=ON \
91 -D SHERPA_ONNX_ENABLE_BINARY=OFF \ 101 -D SHERPA_ONNX_ENABLE_BINARY=OFF \
92 -D CMAKE_BUILD_TYPE=Release \ 102 -D CMAKE_BUILD_TYPE=Release \
@@ -98,15 +108,55 @@ jobs: @@ -98,15 +108,55 @@ jobs:
98 export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" 108 export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
99 109
100 cd build 110 cd build
101 - make -j2 sherpa-onnx-c-api 111 + cmake --build . --target install --config Release
  112 +
  113 + ls -lh install/lib/
  114 +
  115 + if [[ ${{ matrix.os }} == 'windows-latest' ]]; then
  116 + cp -v install/lib/*.dll ../pascal-api-examples/read-wav
  117 + cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr
102 118
103 - - name: Run Pascal test 119 + cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav
  120 + cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr
  121 + fi
  122 +
  123 + - name: Run Pascal test (Read wav test)
104 shell: bash 124 shell: bash
105 run: | 125 run: |
  126 + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
  127 +
106 cd ./pascal-api-examples 128 cd ./pascal-api-examples
107 129
108 - echo "----read-wav test-----"  
109 pushd read-wav 130 pushd read-wav
110 ./run.sh 131 ./run.sh
  132 + echo "---"
  133 + ls -lh
  134 + popd
  135 +
  136 + - name: Run Pascal test (Streaming ASR)
  137 + shell: bash
  138 + run: |
  139 + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
  140 +
  141 + cd ./pascal-api-examples
  142 +
  143 + pushd streaming-asr
  144 + ./run-zipformer-transducer.sh
  145 + rm -rf sherpa-onnx-*
  146 + echo "---"
  147 +
  148 + if [[ ${{ matrix.os }} != 'windows-latest' ]]; then
  149 + ./run-paraformer.sh
  150 + rm -rf sherpa-onnx-*
  151 + echo "---"
  152 +
  153 + ./run-zipformer-ctc.sh
  154 + echo "---"
  155 +
  156 + ./run-zipformer-ctc-hlg.sh
  157 + rm -rf sherpa-onnx-*
  158 + echo "---"
  159 + fi
  160 +
111 ls -lh 161 ls -lh
112 popd 162 popd
@@ -29,7 +29,7 @@ public class StreamingDecodeFileCtcHLG { @@ -29,7 +29,7 @@ public class StreamingDecodeFileCtcHLG {
29 .build(); 29 .build();
30 30
31 OnlineCtcFstDecoderConfig ctcFstDecoderConfig = 31 OnlineCtcFstDecoderConfig ctcFstDecoderConfig =
32 - OnlineCtcFstDecoderConfig.builder().setGraph("hlg").build(); 32 + OnlineCtcFstDecoderConfig.builder().setGraph(hlg).build();
33 33
34 OnlineRecognizerConfig config = 34 OnlineRecognizerConfig config =
35 OnlineRecognizerConfig.builder() 35 OnlineRecognizerConfig.builder()
  1 +# Introduction
  2 +
  3 +This directory contains examples for how to use the [Object Pascal](https://en.wikipedia.org/wiki/Object_Pascal)
  4 +APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
  5 +
  6 +|Directory| Description|
  7 +|---------|------------|
  8 +|[read-wav](./read-wav)|It shows how to read a wave file.|
  9 +|[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.|
@@ -7,10 +7,11 @@ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) @@ -7,10 +7,11 @@ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
7 7
8 echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" 8 echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
9 9
10 -if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/libsherpa-onnx-c-api.so ]]; then 10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
11 mkdir -p ../../build 11 mkdir -p ../../build
12 pushd ../../build 12 pushd ../../build
13 cmake \ 13 cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
14 -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ 15 -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
15 -DSHERPA_ONNX_ENABLE_TESTS=OFF \ 16 -DSHERPA_ONNX_ENABLE_TESTS=OFF \
16 -DSHERPA_ONNX_ENABLE_CHECK=OFF \ 17 -DSHERPA_ONNX_ENABLE_CHECK=OFF \
@@ -18,8 +19,7 @@ if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/l @@ -18,8 +19,7 @@ if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/l
18 -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ 19 -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
19 .. 20 ..
20 21
21 - make -j4 sherpa-onnx-c-api  
22 - ls -lh lib 22 + cmake --build . --target install --config Release
23 popd 23 popd
24 fi 24 fi
25 25
@@ -29,10 +29,10 @@ fi @@ -29,10 +29,10 @@ fi
29 29
30 fpc \ 30 fpc \
31 -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ 31 -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
32 - -Fl$SHERPA_ONNX_DIR/build/lib \ 32 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
33 ./main.pas 33 ./main.pas
34 34
35 -export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/lib:$LD_LIBRARY_PATH  
36 -export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/lib:$DYLD_LIBRARY_PATH 35 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  36 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
37 37
38 ./main 38 ./main
  1 +zipformer_transducer
  2 +paraformer
  3 +zipformer_ctc
  4 +zipformer_ctc_hlg
  1 +# Introduction
  2 +
  3 +This folder contains examples about using sherpa-onnx's object pascal
  4 +APIs with streaming models for speech recognition.
  5 +
  6 +|File|Description|
  7 +|----|-----------|
  8 +|[run-paraformer.sh](./run-paraformer.sh)|Use a streaming Paraformer model for speech recognition|
  9 +|[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition|
  10 +|[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition|
  11 +|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition|
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a streaming Paraformer model to decode files.
  5 +
  6 +You can download the model files from
  7 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  8 +}
  9 +
  10 +program paraformer;
  11 +
  12 +{$mode objfpc}
  13 +
  14 +uses
  15 + sherpa_onnx,
  16 + DateUtils,
  17 + SysUtils;
  18 +
  19 +var
  20 + Config: TSherpaOnnxOnlineRecognizerConfig;
  21 + Recognizer: TSherpaOnnxOnlineRecognizer;
  22 + Stream: TSherpaOnnxOnlineStream;
  23 + RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  24 + Wave: TSherpaOnnxWave;
  25 + WaveFilename: AnsiString;
  26 + TailPaddings: array of Single;
  27 +
  28 + Start: TDateTime;
  29 + Stop: TDateTime;
  30 +
  31 + Elapsed: Single;
  32 + Duration: Single;
  33 + RealTimeFactor: Single;
  34 +begin
  35 + Initialize(Config);
  36 +
  37 + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  38 + to download model files used in this file.}
  39 + Config.ModelConfig.Paraformer.Encoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx';
  40 + Config.ModelConfig.Paraformer.Decoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx';
  41 + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
  42 +
  43 + Config.ModelConfig.Provider := 'cpu';
  44 + Config.ModelConfig.NumThreads := 1;
  45 + Config.ModelConfig.Debug := False;
  46 +
  47 + WaveFilename := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav';
  48 +
  49 + Wave := SherpaOnnxReadWave(WaveFilename);
  50 +
  51 + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
  52 +
  53 + Start := Now;
  54 +
  55 + Stream := Recognizer.CreateStream();
  56 +
  57 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  58 +
  59 + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
  60 + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
  61 +
  62 + Stream.InputFinished();
  63 +
  64 + while Recognizer.IsReady(Stream) do
  65 + Recognizer.Decode(Stream);
  66 +
  67 + RecognitionResult := Recognizer.GetResult(Stream);
  68 +
  69 + Stop := Now;
  70 +
  71 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  72 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  73 + RealTimeFactor := Elapsed / Duration;
  74 +
  75 + WriteLn(RecognitionResult.ToString);
  76 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  77 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  78 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  79 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  80 +
  81 + {Free resources to avoid memory leak.
  82 +
  83 + Note: You don't need to invoke them for this simple script.
  84 + However, you have to invoke them in your own large/complex project.
  85 + }
  86 + FreeAndNil(Stream);
  87 + FreeAndNil(Recognizer);
  88 +end.
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +
  28 +if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then
  29 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  30 + tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  31 + rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  32 +fi
  33 +
  34 +fpc \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./paraformer.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./paraformer
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  29 + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  30 + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  31 +fi
  32 +
  33 +fpc \
  34 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  35 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  36 + ./zipformer_ctc_hlg.pas
  37 +
  38 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  39 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  40 +
  41 +./zipformer_ctc_hlg
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  29 + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  30 + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  31 +fi
  32 +
  33 +fpc \
  34 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  35 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  36 + ./zipformer_ctc.pas
  37 +
  38 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  39 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  40 +
  41 +./zipformer_ctc
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  29 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  30 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  31 +fi
  32 +
  33 +
  34 +fpc \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./zipformer_transducer.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./zipformer_transducer
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a streaming Zipformer CTC model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program zipformer_ctc;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Config: TSherpaOnnxOnlineRecognizerConfig;
  22 + Recognizer: TSherpaOnnxOnlineRecognizer;
  23 + Stream: TSherpaOnnxOnlineStream;
  24 + RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  25 + Wave: TSherpaOnnxWave;
  26 + WaveFilename: AnsiString;
  27 + TailPaddings: array of Single;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Initialize(Config);
  37 +
  38 + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  39 + to download model files used in this file.}
  40 + Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx';
  41 + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt';
  42 + Config.ModelConfig.Provider := 'cpu';
  43 + Config.ModelConfig.NumThreads := 1;
  44 + Config.ModelConfig.Debug := False;
  45 +
  46 + WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';
  47 +
  48 + Wave := SherpaOnnxReadWave(WaveFilename);
  49 +
  50 + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
  51 +
  52 + Start := Now;
  53 +
  54 + Stream := Recognizer.CreateStream();
  55 +
  56 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  57 +
  58 + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
  59 + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
  60 +
  61 + Stream.InputFinished();
  62 +
  63 + while Recognizer.IsReady(Stream) do
  64 + Recognizer.Decode(Stream);
  65 +
  66 + RecognitionResult := Recognizer.GetResult(Stream);
  67 +
  68 + Stop := Now;
  69 +
  70 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  71 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  72 + RealTimeFactor := Elapsed / Duration;
  73 +
  74 + WriteLn(RecognitionResult.ToString);
  75 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  76 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  77 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  78 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  79 +
  80 + {Free resources to avoid memory leak.
  81 +
  82 + Note: You don't need to invoke them for this simple script.
  83 + However, you have to invoke them in your own large/complex project.
  84 + }
  85 + FreeAndNil(Stream);
  86 + FreeAndNil(Recognizer);
  87 +end.
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a streaming Zipformer CTC model
  5 +with HLG to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program zipformer_ctc_hlg;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Config: TSherpaOnnxOnlineRecognizerConfig;
  22 + Recognizer: TSherpaOnnxOnlineRecognizer;
  23 + Stream: TSherpaOnnxOnlineStream;
  24 + RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  25 + Wave: TSherpaOnnxWave;
  26 + WaveFilename: AnsiString;
  27 + TailPaddings: array of Single;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Initialize(Config);
  37 +
  38 + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  39 + to download model files used in this file.}
  40 + Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx';
  41 + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt';
  42 + Config.ModelConfig.Provider := 'cpu';
  43 + Config.ModelConfig.NumThreads := 1;
  44 + Config.ModelConfig.Debug := True;
  45 + Config.CtcFstDecoderConfig.Graph := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst';
  46 +
  47 + WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';
  48 +
  49 + Wave := SherpaOnnxReadWave(WaveFilename);
  50 +
  51 + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
  52 +
  53 + Start := Now;
  54 +
  55 + Stream := Recognizer.CreateStream();
  56 +
  57 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  58 +
  59 + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
  60 + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
  61 +
  62 + Stream.InputFinished();
  63 +
  64 + while Recognizer.IsReady(Stream) do
  65 + Recognizer.Decode(Stream);
  66 +
  67 + RecognitionResult := Recognizer.GetResult(Stream);
  68 +
  69 + Stop := Now;
  70 +
  71 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  72 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  73 + RealTimeFactor := Elapsed / Duration;
  74 +
  75 + WriteLn(RecognitionResult.ToString);
  76 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  77 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  78 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  79 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  80 +
  81 + {Free resources to avoid memory leak.
  82 +
  83 + Note: You don't need to invoke them for this simple script.
  84 + However, you have to invoke them in your own large/complex project.
  85 + }
  86 + FreeAndNil(Stream);
  87 + FreeAndNil(Recognizer);
  88 +end.
  1 +{ Copyright (c) 2024 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a streaming Zipformer transducer
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program zipformer_transducer;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Config: TSherpaOnnxOnlineRecognizerConfig;
  22 + Recognizer: TSherpaOnnxOnlineRecognizer;
  23 + Stream: TSherpaOnnxOnlineStream;
  24 + RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  25 + Wave: TSherpaOnnxWave;
  26 + WaveFilename: AnsiString;
  27 + TailPaddings: array of Single;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Initialize(Config);
  37 +
  38 + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  39 + to download model files used in this file.}
  40 + Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
  41 + Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
  42 + Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx';
  43 + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
  44 + Config.ModelConfig.Provider := 'cpu';
  45 + Config.ModelConfig.NumThreads := 1;
  46 + Config.ModelConfig.Debug := False;
  47 +
  48 + WaveFilename := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';
  49 +
  50 + Wave := SherpaOnnxReadWave(WaveFilename);
  51 +
  52 + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
  53 +
  54 + Start := Now;
  55 +
  56 + Stream := Recognizer.CreateStream();
  57 +
  58 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  59 +
  60 + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
  61 + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
  62 +
  63 + Stream.InputFinished();
  64 +
  65 + while Recognizer.IsReady(Stream) do
  66 + Recognizer.Decode(Stream);
  67 +
  68 + RecognitionResult := Recognizer.GetResult(Stream);
  69 +
  70 + Stop := Now;
  71 +
  72 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  73 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  74 + RealTimeFactor := Elapsed / Duration;
  75 +
  76 + WriteLn(RecognitionResult.ToString);
  77 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  78 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  79 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  80 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  81 +
  82 + {Free resources to avoid memory leak.
  83 +
  84 + Note: You don't need to invoke them for this simple script.
  85 + However, you have to invoke them in your own large/complex project.
  86 + }
  87 + FreeAndNil(Stream);
  88 + FreeAndNil(Recognizer);
  89 +end.
@@ -75,17 +75,31 @@ SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer( @@ -75,17 +75,31 @@ SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer(
75 SHERPA_ONNX_OR(config->model_config.num_threads, 1); 75 SHERPA_ONNX_OR(config->model_config.num_threads, 1);
76 recognizer_config.model_config.provider_config.provider = 76 recognizer_config.model_config.provider_config.provider =
77 SHERPA_ONNX_OR(config->model_config.provider, "cpu"); 77 SHERPA_ONNX_OR(config->model_config.provider, "cpu");
  78 +
  79 + if (recognizer_config.model_config.provider_config.provider.empty()) {
  80 + recognizer_config.model_config.provider_config.provider = "cpu";
  81 + }
  82 +
78 recognizer_config.model_config.model_type = 83 recognizer_config.model_config.model_type =
79 SHERPA_ONNX_OR(config->model_config.model_type, ""); 84 SHERPA_ONNX_OR(config->model_config.model_type, "");
80 recognizer_config.model_config.debug = 85 recognizer_config.model_config.debug =
81 SHERPA_ONNX_OR(config->model_config.debug, 0); 86 SHERPA_ONNX_OR(config->model_config.debug, 0);
82 recognizer_config.model_config.modeling_unit = 87 recognizer_config.model_config.modeling_unit =
83 SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); 88 SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");
  89 +
  90 + if (recognizer_config.model_config.modeling_unit.empty()) {
  91 + recognizer_config.model_config.modeling_unit = "cjkchar";
  92 + }
  93 +
84 recognizer_config.model_config.bpe_vocab = 94 recognizer_config.model_config.bpe_vocab =
85 SHERPA_ONNX_OR(config->model_config.bpe_vocab, ""); 95 SHERPA_ONNX_OR(config->model_config.bpe_vocab, "");
86 96
87 recognizer_config.decoding_method = 97 recognizer_config.decoding_method =
88 SHERPA_ONNX_OR(config->decoding_method, "greedy_search"); 98 SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
  99 + if (recognizer_config.decoding_method.empty()) {
  100 + recognizer_config.decoding_method = "greedy_search";
  101 + }
  102 +
89 recognizer_config.max_active_paths = 103 recognizer_config.max_active_paths =
90 SHERPA_ONNX_OR(config->max_active_paths, 4); 104 SHERPA_ONNX_OR(config->max_active_paths, 4);
91 105
@@ -391,10 +405,19 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig( @@ -391,10 +405,19 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig(
391 SHERPA_ONNX_OR(config->model_config.debug, 0); 405 SHERPA_ONNX_OR(config->model_config.debug, 0);
392 recognizer_config.model_config.provider = 406 recognizer_config.model_config.provider =
393 SHERPA_ONNX_OR(config->model_config.provider, "cpu"); 407 SHERPA_ONNX_OR(config->model_config.provider, "cpu");
  408 + if (recognizer_config.model_config.provider.empty()) {
  409 + recognizer_config.model_config.provider = "cpu";
  410 + }
  411 +
394 recognizer_config.model_config.model_type = 412 recognizer_config.model_config.model_type =
395 SHERPA_ONNX_OR(config->model_config.model_type, ""); 413 SHERPA_ONNX_OR(config->model_config.model_type, "");
396 recognizer_config.model_config.modeling_unit = 414 recognizer_config.model_config.modeling_unit =
397 SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); 415 SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");
  416 +
  417 + if (recognizer_config.model_config.modeling_unit.empty()) {
  418 + recognizer_config.model_config.modeling_unit = "cjkchar";
  419 + }
  420 +
398 recognizer_config.model_config.bpe_vocab = 421 recognizer_config.model_config.bpe_vocab =
399 SHERPA_ONNX_OR(config->model_config.bpe_vocab, ""); 422 SHERPA_ONNX_OR(config->model_config.bpe_vocab, "");
400 423
@@ -620,6 +643,10 @@ SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter( @@ -620,6 +643,10 @@ SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
620 SHERPA_ONNX_OR(config->model_config.num_threads, 1); 643 SHERPA_ONNX_OR(config->model_config.num_threads, 1);
621 spotter_config.model_config.provider_config.provider = 644 spotter_config.model_config.provider_config.provider =
622 SHERPA_ONNX_OR(config->model_config.provider, "cpu"); 645 SHERPA_ONNX_OR(config->model_config.provider, "cpu");
  646 + if (spotter_config.model_config.provider_config.provider.empty()) {
  647 + spotter_config.model_config.provider_config.provider = "cpu";
  648 + }
  649 +
623 spotter_config.model_config.model_type = 650 spotter_config.model_config.model_type =
624 SHERPA_ONNX_OR(config->model_config.model_type, ""); 651 SHERPA_ONNX_OR(config->model_config.model_type, "");
625 spotter_config.model_config.debug = 652 spotter_config.model_config.debug =
@@ -855,6 +882,10 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( @@ -855,6 +882,10 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
855 vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000); 882 vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000);
856 vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); 883 vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
857 vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu"); 884 vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
  885 + if (vad_config.provider.empty()) {
  886 + vad_config.provider = "cpu";
  887 + }
  888 +
858 vad_config.debug = SHERPA_ONNX_OR(config->debug, false); 889 vad_config.debug = SHERPA_ONNX_OR(config->debug, false);
859 890
860 if (vad_config.debug) { 891 if (vad_config.debug) {
@@ -956,6 +987,10 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( @@ -956,6 +987,10 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
956 tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); 987 tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
957 tts_config.model.debug = config->model.debug; 988 tts_config.model.debug = config->model.debug;
958 tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); 989 tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  990 + if (tts_config.model.provider.empty()) {
  991 + tts_config.model.provider = "cpu";
  992 + }
  993 +
959 tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); 994 tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
960 tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, ""); 995 tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
961 tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); 996 tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);
@@ -1101,6 +1136,9 @@ SherpaOnnxCreateSpokenLanguageIdentification( @@ -1101,6 +1136,9 @@ SherpaOnnxCreateSpokenLanguageIdentification(
1101 slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); 1136 slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
1102 slid_config.debug = config->debug; 1137 slid_config.debug = config->debug;
1103 slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu"); 1138 slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
  1139 + if (slid_config.provider.empty()) {
  1140 + slid_config.provider = "cpu";
  1141 + }
1104 1142
1105 if (slid_config.debug) { 1143 if (slid_config.debug) {
1106 SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str()); 1144 SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str());
@@ -1167,6 +1205,9 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor( @@ -1167,6 +1205,9 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor(
1167 c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); 1205 c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
1168 c.debug = SHERPA_ONNX_OR(config->debug, 0); 1206 c.debug = SHERPA_ONNX_OR(config->debug, 0);
1169 c.provider = SHERPA_ONNX_OR(config->provider, "cpu"); 1207 c.provider = SHERPA_ONNX_OR(config->provider, "cpu");
  1208 + if (c.provider.empty()) {
  1209 + c.provider = "cpu";
  1210 + }
1170 1211
1171 if (config->debug) { 1212 if (config->debug) {
1172 SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); 1213 SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
@@ -1401,6 +1442,10 @@ const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging( @@ -1401,6 +1442,10 @@ const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging(
1401 ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); 1442 ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
1402 ac.model.debug = config->model.debug; 1443 ac.model.debug = config->model.debug;
1403 ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); 1444 ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  1445 + if (ac.model.provider.empty()) {
  1446 + ac.model.provider = "cpu";
  1447 + }
  1448 +
1404 ac.labels = SHERPA_ONNX_OR(config->labels, ""); 1449 ac.labels = SHERPA_ONNX_OR(config->labels, "");
1405 ac.top_k = SHERPA_ONNX_OR(config->top_k, 5); 1450 ac.top_k = SHERPA_ONNX_OR(config->top_k, 5);
1406 1451
@@ -1487,6 +1532,9 @@ const SherpaOnnxOfflinePunctuation *SherpaOnnxCreateOfflinePunctuation( @@ -1487,6 +1532,9 @@ const SherpaOnnxOfflinePunctuation *SherpaOnnxCreateOfflinePunctuation(
1487 c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); 1532 c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
1488 c.model.debug = config->model.debug; 1533 c.model.debug = config->model.debug;
1489 c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); 1534 c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  1535 + if (c.model.provider.empty()) {
  1536 + c.model.provider = "cpu";
  1537 + }
1490 1538
1491 if (c.model.debug) { 1539 if (c.model.debug) {
1492 SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); 1540 SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
@@ -4,6 +4,9 @@ unit sherpa_onnx; @@ -4,6 +4,9 @@ unit sherpa_onnx;
4 4
5 {$mode objfpc} 5 {$mode objfpc}
6 6
  7 +{$modeSwitch advancedRecords} { to support records with methods }
  8 +(* {$LongStrings ON} *)
  9 +
7 interface 10 interface
8 11
9 type 12 type
@@ -12,15 +15,117 @@ type @@ -12,15 +15,117 @@ type
12 SampleRate: Integer; 15 SampleRate: Integer;
13 end; 16 end;
14 17
  18 + TSherpaOnnxOnlineTransducerModelConfig = record
  19 + Encoder: AnsiString;
  20 + Decoder: AnsiString;
  21 + Joiner: AnsiString;
  22 + function ToString: AnsiString;
  23 + end;
  24 +
  25 + TSherpaOnnxOnlineParaformerModelConfig = record
  26 + Encoder: AnsiString;
  27 + Decoder: AnsiString;
  28 + function ToString: AnsiString;
  29 + end;
  30 +
  31 + TSherpaOnnxOnlineZipformer2CtcModelConfig = record
  32 + Model: AnsiString;
  33 + function ToString: AnsiString;
  34 + end;
  35 +
  36 + TSherpaOnnxOnlineModelConfig = record
  37 + Transducer: TSherpaOnnxOnlineTransducerModelConfig;
  38 + Paraformer: TSherpaOnnxOnlineParaformerModelConfig;
  39 + Zipformer2Ctc: TSherpaOnnxOnlineZipformer2CtcModelConfig;
  40 + Tokens: AnsiString;
  41 + NumThreads: Integer;
  42 + Provider: AnsiString;
  43 + Debug: Boolean;
  44 + ModelType: AnsiString;
  45 + ModelingUnit: AnsiString;
  46 + BpeVocab: AnsiString;
  47 + function ToString: AnsiString;
  48 + end;
  49 +
  50 + TSherpaOnnxFeatureConfig = record
  51 + SampleRate: Integer;
  52 + FeatureDim: Integer;
  53 + function ToString: AnsiString;
  54 + end;
  55 +
  56 + TSherpaOnnxOnlineCtcFstDecoderConfig = record
  57 + Graph: AnsiString;
  58 + MaxActive: Integer;
  59 + function ToString: AnsiString;
  60 + end;
  61 +
  62 + TSherpaOnnxOnlineRecognizerConfig = record
  63 + FeatConfig: TSherpaOnnxFeatureConfig;
  64 + ModelConfig: TSherpaOnnxOnlineModelConfig;
  65 + DecodingMethod: AnsiString;
  66 + MaxActivePaths: Integer;
  67 + EnableEndpoint: Boolean;
  68 + Rule1MinTrailingSilence: Single;
  69 + Rule2MinTrailingSilence: Single;
  70 + Rule3MinUtteranceLength: Single;
  71 + HotwordsFile: AnsiString;
  72 + HotwordsScore: Single;
  73 + CtcFstDecoderConfig: TSherpaOnnxOnlineCtcFstDecoderConfig;
  74 + RuleFsts: AnsiString;
  75 + RuleFars: AnsiString;
  76 + BlankPenalty: Single;
  77 + function ToString: AnsiString;
  78 + end;
  79 +
  80 + TSherpaOnnxOnlineRecognizerResult = record
  81 + Text: AnsiString;
  82 + Tokens: array of AnsiString;
  83 + Timestamps: array of Single;
  84 + function ToString: AnsiString;
  85 + end;
  86 +
  87 + TSherpaOnnxOnlineStream = class
  88 + private
  89 + Handle: Pointer;
  90 + public
  91 + constructor Create(P: Pointer);
  92 + destructor Destroy; override;
  93 + procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
  94 + procedure InputFinished;
  95 + end;
  96 +
  97 + TSherpaOnnxOnlineRecognizer = class
  98 + private
  99 + Handle: Pointer;
  100 + public
  101 + constructor Create(Config: TSherpaOnnxOnlineRecognizerConfig);
  102 + destructor Destroy; override;
  103 +
  104 + function CreateStream: TSherpaOnnxOnlineStream; overload;
  105 + function CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream; overload;
  106 + function IsReady(Stream: TSherpaOnnxOnlineStream): Boolean;
  107 + procedure Decode(Stream: TSherpaOnnxOnlineStream);
  108 + procedure Reset(Stream: TSherpaOnnxOnlineStream);
  109 + function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
  110 + function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
  111 + end;
  112 +
15 { It supports reading a single channel wave with 16-bit encoded samples. 113 { It supports reading a single channel wave with 16-bit encoded samples.
16 Samples are normalized to the range [-1, 1]. 114 Samples are normalized to the range [-1, 1].
17 } 115 }
18 -function SherpaOnnxReadWave(Filename: string): TSherpaOnnxWave; 116 +function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
19 117
20 implementation 118 implementation
21 119
22 uses 120 uses
23 - ctypes; 121 + ctypes,
  122 + fpjson,
  123 + { See
  124 + - https://wiki.freepascal.org/fcl-json
  125 + - https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
  126 + }
  127 + jsonparser,
  128 + SysUtils;
24 129
25 const 130 const
26 {See https://www.freepascal.org/docs-html/prog/progap7.html} 131 {See https://www.freepascal.org/docs-html/prog/progap7.html}
@@ -47,31 +152,383 @@ type @@ -47,31 +152,383 @@ type
47 152
48 PSherpaOnnxWave = ^SherpaOnnxWave; 153 PSherpaOnnxWave = ^SherpaOnnxWave;
49 154
  155 + SherpaOnnxOnlineTransducerModelConfig = record
  156 + Encoder: PAnsiChar;
  157 + Decoder: PAnsiChar;
  158 + Joiner: PAnsiChar;
  159 + end;
  160 + SherpaOnnxOnlineParaformerModelConfig = record
  161 + Encoder: PAnsiChar;
  162 + Decoder: PAnsiChar;
  163 + end;
  164 + SherpaOnnxOnlineZipformer2CtcModelConfig = record
  165 + Model: PAnsiChar;
  166 + end;
  167 +
  168 + SherpaOnnxOnlineModelConfig= record
  169 + Transducer: SherpaOnnxOnlineTransducerModelConfig;
  170 + Paraformer: SherpaOnnxOnlineParaformerModelConfig;
  171 + Zipformer2Ctc: SherpaOnnxOnlineZipformer2CtcModelConfig;
  172 + Tokens: PAnsiChar;
  173 + NumThreads: cint32;
  174 + Provider: PAnsiChar;
  175 + Debug: cint32;
  176 + ModelType: PAnsiChar;
  177 + ModelingUnit: PAnsiChar;
  178 + BpeVocab: PAnsiChar;
  179 + end;
  180 + SherpaOnnxFeatureConfig = record
  181 + SampleRate: cint32;
  182 + FeatureDim: cint32;
  183 + end;
  184 + SherpaOnnxOnlineCtcFstDecoderConfig = record
  185 + Graph: PAnsiChar;
  186 + MaxActive: cint32;
  187 + end;
  188 + SherpaOnnxOnlineRecognizerConfig = record
  189 + FeatConfig: SherpaOnnxFeatureConfig;
  190 + ModelConfig: SherpaOnnxOnlineModelConfig;
  191 + DecodingMethod: PAnsiChar;
  192 + MaxActivePaths: cint32;
  193 + EnableEndpoint: cint32;
  194 + Rule1MinTrailingSilence: Single;
  195 + Rule2MinTrailingSilence: Single;
  196 + Rule3MinUtteranceLength: Single;
  197 + HotwordsFile: PAnsiChar;
  198 + HotwordsScore: Single;
  199 + CtcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig;
  200 + RuleFsts: PAnsiChar;
  201 + RuleFars: PAnsiChar;
  202 + BlankPenalty: Single;
  203 + end;
  204 +
  205 + PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig;
  206 +
  207 +function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl;
  208 + external SherpaOnnxLibName;
  209 +
  210 +procedure SherpaOnnxDestroyOnlineRecognizer(Recognizer: Pointer); cdecl;
  211 + external SherpaOnnxLibName;
  212 +
  213 +function SherpaOnnxCreateOnlineStream(Recognizer: Pointer): Pointer; cdecl;
  214 + external SherpaOnnxLibName;
  215 +
  216 +function SherpaOnnxCreateOnlineStreamWithHotwords(Recognizer: Pointer; Hotwords: PAnsiChar): Pointer; cdecl;
  217 + external SherpaOnnxLibName;
  218 +
  219 +procedure SherpaOnnxDestroyOnlineStream(Recognizer: Pointer); cdecl;
  220 + external SherpaOnnxLibName;
  221 +
  222 +procedure SherpaOnnxOnlineStreamAcceptWaveform(Stream: Pointer;
  223 + SampleRate: cint32; Samples: pcfloat; N: cint32 ); cdecl;
  224 + external SherpaOnnxLibName;
  225 +
  226 +procedure SherpaOnnxOnlineStreamInputFinished(Stream: Pointer); cdecl;
  227 + external SherpaOnnxLibName;
  228 +
  229 +function SherpaOnnxIsOnlineStreamReady(Recognizer: Pointer; Stream: Pointer): cint32; cdecl;
  230 + external SherpaOnnxLibName;
  231 +
  232 +procedure SherpaOnnxDecodeOnlineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
  233 + external SherpaOnnxLibName;
  234 +
  235 +procedure SherpaOnnxOnlineStreamReset(Recognizer: Pointer; Stream: Pointer); cdecl;
  236 + external SherpaOnnxLibName;
  237 +
  238 +function SherpaOnnxOnlineStreamIsEndpoint(Recognizer: Pointer; Stream: Pointer): cint32; cdecl;
  239 + external SherpaOnnxLibName;
  240 +
  241 +function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Pointer): PAnsiChar; cdecl;
  242 + external SherpaOnnxLibName;
  243 +
  244 +procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl;
  245 + external SherpaOnnxLibName;
  246 +
50 function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl; 247 function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl;
51 external SherpaOnnxLibName name 'SherpaOnnxReadWave'; 248 external SherpaOnnxLibName name 'SherpaOnnxReadWave';
52 249
53 procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl; 250 procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl;
54 external SherpaOnnxLibName name 'SherpaOnnxFreeWave'; 251 external SherpaOnnxLibName name 'SherpaOnnxFreeWave';
55 252
56 -function SherpaOnnxReadWave(Filename: string): TSherpaOnnxWave; 253 +function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
57 var 254 var
58 - AnsiFilename: AnsiString;  
59 PFilename: PAnsiChar; 255 PFilename: PAnsiChar;
60 PWave: PSherpaOnnxWave; 256 PWave: PSherpaOnnxWave;
61 I: Integer; 257 I: Integer;
62 begin 258 begin
63 - AnsiFilename := Filename;  
64 - PFilename := PAnsiChar(AnsiFilename); 259 + PFilename := PAnsiChar(Filename);
65 PWave := SherpaOnnxReadWaveWrapper(PFilename); 260 PWave := SherpaOnnxReadWaveWrapper(PFilename);
66 261
  262 + Result.Samples := nil;
67 SetLength(Result.Samples, PWave^.NumSamples); 263 SetLength(Result.Samples, PWave^.NumSamples);
68 264
69 Result.SampleRate := PWave^.SampleRate; 265 Result.SampleRate := PWave^.SampleRate;
70 266
71 for I := Low(Result.Samples) to High(Result.Samples) do 267 for I := Low(Result.Samples) to High(Result.Samples) do
72 - Result.Samples[i] := PWave^.Samples[i]; 268 + Result.Samples[I] := PWave^.Samples[I];
73 269
74 SherpaOnnxFreeWaveWrapper(PWave); 270 SherpaOnnxFreeWaveWrapper(PWave);
75 end; 271 end;
76 272
  273 +function TSherpaOnnxOnlineTransducerModelConfig.ToString: AnsiString;
  274 +begin
  275 + Result := Format('TSherpaOnnxOnlineTransducerModelConfig(Encoder := %s, Decoder := %s, Joiner := %s)',
  276 + [Self.Encoder, Self.Decoder, Self.Joiner]);
  277 +end;
  278 +
  279 +function TSherpaOnnxOnlineParaformerModelConfig.ToString: AnsiString;
  280 +begin
  281 + Result := Format('TSherpaOnnxOnlineParaformerModelConfig(Encoder := %s, Decoder := %s)',
  282 + [Self.Encoder, Self.Decoder]);
  283 +end;
  284 +
  285 +function TSherpaOnnxOnlineZipformer2CtcModelConfig.ToString: AnsiString;
  286 +begin
  287 + Result := Format('TSherpaOnnxOnlineZipformer2CtcModelConfig(Model := %s)',
  288 + [Self.Model]);
  289 +end;
  290 +
  291 +function TSherpaOnnxOnlineModelConfig.ToString: AnsiString;
  292 +begin
  293 + Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' +
  294 + 'Paraformer := %s,' +
  295 + 'Zipformer2Ctc := %s, ' +
  296 + 'Tokens := %s, ' +
  297 + 'NumThreads := %d, ' +
  298 + 'Provider := %s, ' +
  299 + 'Debug := %s, ' +
  300 + 'ModelType := %s, ' +
  301 + 'ModelingUnit := %s, ' +
  302 + 'BpeVocab := %s)'
  303 + ,
  304 + [Self.Transducer.ToString, Self.Paraformer.ToString,
  305 + Self.Zipformer2Ctc.ToString, Self.Tokens,
  306 + Self.NumThreads, Self.Provider, Self.Debug.ToString,
  307 + Self.ModelType, Self.ModelingUnit, Self.BpeVocab
  308 + ]);
  309 +end;
  310 +
  311 +function TSherpaOnnxFeatureConfig.ToString: AnsiString;
  312 +begin
  313 + Result := Format('TSherpaOnnxFeatureConfig(SampleRate := %d, FeatureDim := %d)',
  314 + [Self.SampleRate, Self.FeatureDim]);
  315 +end;
  316 +
  317 +function TSherpaOnnxOnlineCtcFstDecoderConfig.ToString: AnsiString;
  318 +begin
  319 + Result := Format('TSherpaOnnxOnlineCtcFstDecoderConfig(Graph := %s, MaxActive := %d)',
  320 + [Self.Graph, Self.MaxActive]);
  321 +end;
  322 +
  323 +function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString;
  324 +begin
  325 + Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfg := %s, ' +
  326 + 'ModelConfig := %s, ' +
  327 + 'DecodingMethod := %s, ' +
  328 + 'MaxActivePaths := %d, ' +
  329 + 'EnableEndpoint := %s, ' +
  330 + 'Rule1MinTrailingSilence := %.1f, ' +
  331 + 'Rule2MinTrailingSilence := %.1f, ' +
  332 + 'Rule3MinUtteranceLength := %.1f, ' +
  333 + 'HotwordsFile := %s, ' +
  334 + 'HotwordsScore := %.1f, ' +
  335 + 'CtcFstDecoderConfig := %s, ' +
  336 + 'RuleFsts := %s, ' +
  337 + 'RuleFars := %s, ' +
  338 + 'BlankPenalty := %.1f' +
  339 + ')'
  340 + ,
  341 + [Self.FeatConfig.ToString, Self.ModelConfig.ToString,
  342 + Self.DecodingMethod, Self.MaxActivePaths, Self.EnableEndpoint.ToString,
  343 + Self.Rule1MinTrailingSilence, Self.Rule2MinTrailingSilence,
  344 + Self.Rule3MinUtteranceLength, Self.HotwordsFile, Self.HotwordsScore,
  345 + Self.CtcFstDecoderConfig.ToString, Self.RuleFsts, Self.RuleFars,
  346 + Self.BlankPenalty
  347 + ]);
  348 +end;
  349 +
  350 +function TSherpaOnnxOnlineRecognizerResult.ToString: AnsiString;
  351 +var
  352 + TokensStr: AnsiString;
  353 + S: AnsiString;
  354 + TimestampStr: AnsiString;
  355 + T: Single;
  356 + Sep: AnsiString;
  357 +begin
  358 + TokensStr := '[';
  359 + Sep := '';
  360 + for S in Self.Tokens do
  361 + begin
  362 + TokensStr := TokensStr + Sep + S;
  363 + Sep := ', ';
  364 + end;
  365 + TokensStr := TokensStr + ']';
  366 +
  367 + TimestampStr := '[';
  368 + Sep := '';
  369 + for T in Self.Timestamps do
  370 + begin
  371 + TimestampStr := TimestampStr + Sep + Format('%.2f', [T]);
  372 + Sep := ', ';
  373 + end;
  374 + TimestampStr := TimestampStr + ']';
  375 +
  376 + Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' +
  377 + 'Tokens := %s, ' +
  378 + 'Timestamps := %s, ' +
  379 + ')',
  380 + [Self.Text, TokensStr, TimestampStr]);
  381 +end;
  382 +
  383 +constructor TSherpaOnnxOnlineRecognizer.Create(Config: TSherpaOnnxOnlineRecognizerConfig);
  384 +var
  385 + C: SherpaOnnxOnlineRecognizerConfig;
  386 +begin
  387 + Initialize(C);
  388 +
  389 + C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate;
  390 + C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim;
  391 +
  392 + C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder);
  393 + C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder);
  394 + C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner);
  395 +
  396 + C.ModelConfig.Paraformer.Encoder := PAnsiChar(Config.ModelConfig.Paraformer.Encoder);
  397 + C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder);
  398 +
  399 + C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model);
  400 +
  401 + C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
  402 + C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
  403 + C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider);
  404 + C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug);
  405 + C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType);
  406 + C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit);
  407 + C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab);
  408 +
  409 + C.DecodingMethod := PAnsiChar(Config.DecodingMethod);
  410 + C.MaxActivePaths := Config.MaxActivePaths;
  411 + C.EnableEndpoint := Ord(Config.EnableEndpoint);
  412 + C.Rule1MinTrailingSilence := Config.Rule1MinTrailingSilence;
  413 + C.Rule2MinTrailingSilence := Config.Rule2MinTrailingSilence;
  414 + C.Rule3MinUtteranceLength := Config.Rule3MinUtteranceLength;
  415 + C.HotwordsFile := PAnsiChar(Config.HotwordsFile);
  416 + C.HotwordsScore := Config.HotwordsScore;
  417 + C.CtcFstDecoderConfig.Graph := PAnsiChar(Config.CtcFstDecoderConfig.Graph);
  418 + C.CtcFstDecoderConfig.MaxActive := Config.CtcFstDecoderConfig.MaxActive;
  419 + C.RuleFsts := PAnsiChar(Config.RuleFsts);
  420 + C.RuleFars := PAnsiChar(Config.RuleFars);
  421 + C.BlankPenalty := Config.BlankPenalty;
  422 +
  423 + Self.Handle := SherpaOnnxCreateOnlineRecognizer(@C);
  424 +end;
  425 +
  426 +destructor TSherpaOnnxOnlineRecognizer.Destroy;
  427 +begin
  428 + SherpaOnnxDestroyOnlineRecognizer(Self.Handle);
  429 + Self.Handle := nil;
  430 +end;
  431 +
  432 +function TSherpaOnnxOnlineRecognizer.CreateStream: TSherpaOnnxOnlineStream;
  433 +var
  434 + Stream: Pointer;
  435 +begin
  436 + Stream := SherpaOnnxCreateOnlineStream(Self.Handle);
  437 + Result := TSherpaOnnxOnlineStream.Create(Stream);
  438 +end;
  439 +
  440 +function TSherpaOnnxOnlineRecognizer.CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream;
  441 +var
  442 + Stream: Pointer;
  443 +begin
  444 + Stream := SherpaOnnxCreateOnlineStreamWithHotwords(Self.Handle, PAnsiChar(Hotwords));
  445 + Result := TSherpaOnnxOnlineStream.Create(Stream);
  446 +end;
  447 +
  448 +function TSherpaOnnxOnlineRecognizer.IsReady(Stream: TSherpaOnnxOnlineStream): Boolean;
  449 +begin
  450 + Result := SherpaOnnxIsOnlineStreamReady(Self.Handle, Stream.Handle) = 1;
  451 +end;
  452 +
  453 +procedure TSherpaOnnxOnlineRecognizer.Decode(Stream: TSherpaOnnxOnlineStream);
  454 +begin
  455 + SherpaOnnxDecodeOnlineStream(Self.Handle, Stream.Handle);
  456 +end;
  457 +
  458 +procedure TSherpaOnnxOnlineRecognizer.Reset(Stream: TSherpaOnnxOnlineStream);
  459 +begin
  460 + SherpaOnnxOnlineStreamReset(Self.Handle, Stream.Handle);
  461 +end;
  462 +
  463 +function TSherpaOnnxOnlineRecognizer.IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
  464 +begin
  465 + Result := SherpaOnnxOnlineStreamIsEndpoint(Self.Handle, Stream.Handle) = 1;
  466 +end;
  467 +
  468 +function TSherpaOnnxOnlineRecognizer.GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
  469 +var
  470 + pJson: PAnsiChar;
  471 + JsonData: TJSONData;
  472 + JsonObject : TJSONObject;
  473 + JsonEnum: TJSONEnum;
  474 + I: Integer;
  475 +begin
  476 + pJson := SherpaOnnxGetOnlineStreamResultAsJson(Self.Handle, Stream.Handle);
  477 +
  478 + {
  479 + - https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
  480 + - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsondata.html
  481 + - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonobject.html
  482 + - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonenum.html
  483 + }
  484 +
  485 + JsonData := GetJSON(AnsiString(pJson), False);
  486 +
  487 + JsonObject := JsonData as TJSONObject;
  488 +
  489 + Result.Text := JsonObject.Strings['text'];
  490 +
  491 + SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count);
  492 +
  493 + I := 0;
  494 + for JsonEnum in JsonObject.Arrays['tokens'] do
  495 + begin
  496 + Result.Tokens[I] := JsonEnum.Value.AsString;
  497 + Inc(I);
  498 + end;
  499 +
  500 + SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count);
  501 + I := 0;
  502 + for JsonEnum in JsonObject.Arrays['timestamps'] do
  503 + begin
  504 + Result.Timestamps[I] := JsonEnum.Value.AsFloat;
  505 + Inc(I);
  506 + end;
  507 +
  508 + SherpaOnnxDestroyOnlineStreamResultJson(pJson);
  509 +end;
  510 +
  511 +
  512 +constructor TSherpaOnnxOnlineStream.Create(P: Pointer);
  513 +begin
  514 + Self.Handle := P;
  515 +end;
  516 +
  517 +destructor TSherpaOnnxOnlineStream.Destroy;
  518 +begin
  519 + SherpaOnnxDestroyOnlineStream(Self.Handle);
  520 + Self.Handle := nil;
  521 +end;
  522 +
  523 +procedure TSherpaOnnxOnlineStream.AcceptWaveform(Samples: array of Single; SampleRate: Integer);
  524 +begin
  525 + SherpaOnnxOnlineStreamAcceptWaveform(Self.Handle, SampleRate,
  526 + pcfloat(Samples), Length(Samples));
  527 +end;
  528 +
  529 +procedure TSherpaOnnxOnlineStream.InputFinished;
  530 +begin
  531 + SherpaOnnxOnlineStreamInputFinished(Self.Handle);
  532 +end;
  533 +
77 end. 534 end.