Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-08-12 19:55:51 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-08-12 19:55:51 +0800
Commit
5791b695eaf3de1874da475ee2cbf6e4925c7663
5791b695
1 parent
65f1c0fa
Pascal API for streaming ASR (#1246)
隐藏空白字符变更
内嵌
并排对比
正在显示
16 个修改的文件
包含
1115 行增加
和
18 行删除
.github/workflows/pascal.yaml
java-api-examples/StreamingDecodeFileCtcHLG.java
pascal-api-examples/README.md
pascal-api-examples/read-wav/run.sh
pascal-api-examples/streaming-asr/.gitignore
pascal-api-examples/streaming-asr/README.md
pascal-api-examples/streaming-asr/paraformer.pas
pascal-api-examples/streaming-asr/run-paraformer.sh
pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh
pascal-api-examples/streaming-asr/run-zipformer-ctc.sh
pascal-api-examples/streaming-asr/run-zipformer-transducer.sh
pascal-api-examples/streaming-asr/zipformer_ctc.pas
pascal-api-examples/streaming-asr/zipformer_ctc_hlg.pas
pascal-api-examples/streaming-asr/zipformer_transducer.pas
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/pascal-api/sherpa_onnx.pas
.github/workflows/pascal.yaml
查看文件 @
5791b69
...
...
@@ -39,7 +39,7 @@ jobs:
strategy
:
fail-fast
:
false
matrix
:
os
:
[
ubuntu-latest
,
macos-latest
,
macos-13
]
os
:
[
ubuntu-latest
,
macos-latest
,
macos-13
,
windows-latest
]
steps
:
-
uses
:
actions/checkout@v4
...
...
@@ -64,10 +64,19 @@ jobs:
run
:
|
brew install fpc
# brew install --cask lazarus
#
-
name
:
Install Free pascal compiler (windows)
if
:
matrix.os == 'windows-latest'
shell
:
bash
run
:
|
choco install lazarus
ls -lh /c/lazarus/fpc/3.2.2/bin/x86_64-win64/
-
name
:
FPC info
shell
:
bash
run
:
|
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
which fpc
fpc -i
...
...
@@ -87,6 +96,7 @@ jobs:
cd build
cmake \
-DCMAKE_INSTALL_PREFIX=./install \
-D BUILD_SHARED_LIBS=ON \
-D SHERPA_ONNX_ENABLE_BINARY=OFF \
-D CMAKE_BUILD_TYPE=Release \
...
...
@@ -98,15 +108,55 @@ jobs:
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
cd build
make -j2 sherpa-onnx-c-api
cmake --build . --target install --config Release
ls -lh install/lib/
if [[ ${{ matrix.os }} == 'windows-latest' ]]; then
cp -v install/lib/*.dll ../pascal-api-examples/read-wav
cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr
-
name
:
Run Pascal test
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr
fi
-
name
:
Run Pascal test (Read wav test)
shell
:
bash
run
:
|
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
cd ./pascal-api-examples
echo "----read-wav test-----"
pushd read-wav
./run.sh
echo "---"
ls -lh
popd
-
name
:
Run Pascal test (Streaming ASR)
shell
:
bash
run
:
|
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
cd ./pascal-api-examples
pushd streaming-asr
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*
echo "---"
if [[ ${{ matrix.os }} != 'windows-latest' ]]; then
./run-paraformer.sh
rm -rf sherpa-onnx-*
echo "---"
./run-zipformer-ctc.sh
echo "---"
./run-zipformer-ctc-hlg.sh
rm -rf sherpa-onnx-*
echo "---"
fi
ls -lh
popd
...
...
java-api-examples/StreamingDecodeFileCtcHLG.java
查看文件 @
5791b69
...
...
@@ -29,7 +29,7 @@ public class StreamingDecodeFileCtcHLG {
.
build
();
OnlineCtcFstDecoderConfig
ctcFstDecoderConfig
=
OnlineCtcFstDecoderConfig
.
builder
().
setGraph
(
"hlg"
).
build
();
OnlineCtcFstDecoderConfig
.
builder
().
setGraph
(
hlg
).
build
();
OnlineRecognizerConfig
config
=
OnlineRecognizerConfig
.
builder
()
...
...
pascal-api-examples/README.md
0 → 100644
查看文件 @
5791b69
# Introduction
This directory contains examples for how to use the
[
Object Pascal
](
https://en.wikipedia.org/wiki/Object_Pascal
)
APIs of
[
sherpa-onnx
](
https://github.com/k2-fsa/sherpa-onnx
)
.
|Directory| Description|
|---------|------------|
|
[
read-wav
](
./read-wav
)
|It shows how to read a wave file.|
|
[
streaming-asr
](
./streaming-asr
)
| It shows how to use streaming models for speech recognition.|
...
...
pascal-api-examples/read-wav/run.sh
查看文件 @
5791b69
...
...
@@ -7,10 +7,11 @@ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
echo
"SHERPA_ONNX_DIR:
$SHERPA_ONNX_DIR
"
if
[[
! -f ../../build/
lib/libsherpa-onnx-c-api.dylib
&&
! -f ../../build/lib/libsherpa-onnx-c-api.so
]]
;
then
if
[[
! -f ../../build/
install/lib/libsherpa-onnx-c-api.dylib
&&
! -f ../../build/install/lib/libsherpa-onnx-c-api.so
&&
! -f ../../build/install/lib/sherpa-onnx-c-api.dll
]]
;
then
mkdir -p ../../build
pushd
../../build
cmake
\
-DCMAKE_INSTALL_PREFIX
=
./install
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
...
...
@@ -18,8 +19,7 @@ if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/l
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
..
make -j4 sherpa-onnx-c-api
ls -lh lib
cmake --build . --target install --config Release
popd
fi
...
...
@@ -29,10 +29,10 @@ fi
fpc
\
-Fu
$SHERPA_ONNX_DIR
/sherpa-onnx/pascal-api
\
-Fl
$SHERPA_ONNX_DIR
/build/lib
\
-Fl
$SHERPA_ONNX_DIR
/build/
install/
lib
\
./main.pas
export
LD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/lib:
$LD_LIBRARY_PATH
export
DYLD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/lib:
$DYLD_LIBRARY_PATH
export
LD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$LD_LIBRARY_PATH
export
DYLD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$DYLD_LIBRARY_PATH
./main
...
...
pascal-api-examples/streaming-asr/.gitignore
0 → 100644
查看文件 @
5791b69
zipformer_transducer
paraformer
zipformer_ctc
zipformer_ctc_hlg
...
...
pascal-api-examples/streaming-asr/README.md
0 → 100644
查看文件 @
5791b69
# Introduction
This folder contains examples about using sherpa-onnx's object pascal
APIs with streaming models for speech recognition.
|File|Description|
|----|-----------|
|
[
run-paraformer.sh
](
./run-paraformer.sh
)
|Use a streaming Paraformer model for speech recognition|
|
[
run-zipformer-ctc-hlg.sh
](
./run-zipformer-ctc-hlg.sh
)
|Use a streaming Zipformer CTC model for speech recognition|
|
[
run-zipformer-ctc.sh
](
./run-zipformer-ctc.sh
)
|Use a streaming Zipformer CTC model with HLG for speech recognition|
|
[
run-zipformer-transducer.sh
](
./run-zipformer-transducer.sh
)
|Use a Zipformer transducer model for speech recognition|
...
...
pascal-api-examples/streaming-asr/paraformer.pas
0 → 100644
查看文件 @
5791b69
{
Copyright
(c)
2024
Xiaomi
Corporation
}
{
This
file
shows
how
to
use
a
streaming
Paraformer
model
to
decode
files.
You
can
download
the
model
files
from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}
program
paraformer;
{
$mode
objfpc
}
uses
sherpa_onnx,
DateUtils,
SysUtils;
var
Config:
TSherpaOnnxOnlineRecognizerConfig;
Recognizer:
TSherpaOnnxOnlineRecognizer;
Stream:
TSherpaOnnxOnlineStream;
RecognitionResult:
TSherpaOnnxOnlineRecognizerResult;
Wave:
TSherpaOnnxWave;
WaveFilename:
AnsiString;
TailPaddings:
array
of
Single;
Start:
TDateTime;
Stop:
TDateTime;
Elapsed:
Single;
Duration:
Single;
RealTimeFactor:
Single;
begin
Initialize(Config);
{
Please
visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
to
download
model
files
used
in
this
file.
}
Config.ModelConfig.Paraformer.Encoder
:=
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int
8
.onnx';
Config.ModelConfig.Paraformer.Decoder
:=
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int
8
.onnx';
Config.ModelConfig.Tokens
:=
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
Config.ModelConfig.Provider
:=
'cpu';
Config.ModelConfig.NumThreads
:=
1
;
Config.ModelConfig.Debug
:=
False;
WaveFilename
:=
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/
2
.wav';
Wave
:=
SherpaOnnxReadWave(WaveFilename);
Recognizer
:=
TSherpaOnnxOnlineRecognizer.Create(Config);
Start
:=
Now;
Stream
:=
Recognizer.CreateStream();
Stream.AcceptWaveform(Wave.Samples,
Wave.SampleRate);
SetLength(TailPaddings,
Round(Wave.SampleRate
*
0.5
));
{
0.5
seconds
of
padding
}
Stream.AcceptWaveform(TailPaddings,
Wave.SampleRate);
Stream.InputFinished();
while
Recognizer.IsReady(Stream)
do
Recognizer.Decode(Stream);
RecognitionResult
:=
Recognizer.GetResult(Stream);
Stop
:=
Now;
Elapsed
:=
MilliSecondsBetween(Stop,
Start)
/
1000
;
Duration
:=
Length(Wave.Samples)
/
Wave.SampleRate;
RealTimeFactor
:=
Elapsed
/
Duration;
WriteLn(RecognitionResult.ToString);
WriteLn(Format('NumThreads
%d',
[
Config.ModelConfig.NumThreads
]
));
WriteLn(Format('Elapsed
%.
3
f
s',
[
Elapsed
]
));
WriteLn(Format('Wave
duration
%.
3
f
s',
[
Duration
]
));
WriteLn(Format('RTF
=
%.
3
f/%.
3
f
=
%.
3
f',
[
Elapsed
,
Duration
,
RealTimeFactor
]
));
{
Free
resources
to
avoid
memory
leak.
Note:
You
don't
need
to
invoke
them
for
this
simple
script.
However,
you
have
to
invoke
them
in
your
own
large/complex
project.
}
FreeAndNil(Stream);
FreeAndNil(Recognizer);
end.
...
...
pascal-api-examples/streaming-asr/run-paraformer.sh
0 → 100755
查看文件 @
5791b69
#!/usr/bin/env bash
set
-ex
SCRIPT_DIR
=
$(
cd
--
"
$(
dirname --
"
${
BASH_SOURCE
[0]
}
"
)
"
&> /dev/null
&&
pwd
)
SHERPA_ONNX_DIR
=
$(
cd
$SCRIPT_DIR
/../..
&&
pwd
)
echo
"SHERPA_ONNX_DIR:
$SHERPA_ONNX_DIR
"
if
[[
! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib
&&
! -f ../../build/install/lib/libsherpa-onnx-c-api.so
&&
! -f ../../build/install/lib/sherpa-onnx-c-api.dll
]]
;
then
mkdir -p ../../build
pushd
../../build
cmake
\
-DCMAKE_INSTALL_PREFIX
=
./install
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
..
cmake --build . --target install --config Release
ls -lh lib
popd
fi
if
[
! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
fi
fpc
\
-Fu
$SHERPA_ONNX_DIR
/sherpa-onnx/pascal-api
\
-Fl
$SHERPA_ONNX_DIR
/build/install/lib
\
./paraformer.pas
export
LD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$LD_LIBRARY_PATH
export
DYLD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$DYLD_LIBRARY_PATH
./paraformer
...
...
pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh
0 → 100755
查看文件 @
5791b69
#!/usr/bin/env bash
set
-ex
SCRIPT_DIR
=
$(
cd
--
"
$(
dirname --
"
${
BASH_SOURCE
[0]
}
"
)
"
&> /dev/null
&&
pwd
)
SHERPA_ONNX_DIR
=
$(
cd
$SCRIPT_DIR
/../..
&&
pwd
)
echo
"SHERPA_ONNX_DIR:
$SHERPA_ONNX_DIR
"
if
[[
! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib
&&
! -f ../../build/install/lib/libsherpa-onnx-c-api.so
&&
! -f ../../build/install/lib/sherpa-onnx-c-api.dll
]]
;
then
mkdir -p ../../build
pushd
../../build
cmake
\
-DCMAKE_INSTALL_PREFIX
=
./install
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
..
cmake --build . --target install --config Release
ls -lh lib
popd
fi
if
[
! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi
fpc
\
-Fu
$SHERPA_ONNX_DIR
/sherpa-onnx/pascal-api
\
-Fl
$SHERPA_ONNX_DIR
/build/install/lib
\
./zipformer_ctc_hlg.pas
export
LD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$LD_LIBRARY_PATH
export
DYLD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$DYLD_LIBRARY_PATH
./zipformer_ctc_hlg
...
...
pascal-api-examples/streaming-asr/run-zipformer-ctc.sh
0 → 100755
查看文件 @
5791b69
#!/usr/bin/env bash
set
-ex
SCRIPT_DIR
=
$(
cd
--
"
$(
dirname --
"
${
BASH_SOURCE
[0]
}
"
)
"
&> /dev/null
&&
pwd
)
SHERPA_ONNX_DIR
=
$(
cd
$SCRIPT_DIR
/../..
&&
pwd
)
echo
"SHERPA_ONNX_DIR:
$SHERPA_ONNX_DIR
"
if
[[
! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib
&&
! -f ../../build/install/lib/libsherpa-onnx-c-api.so
&&
! -f ../../build/install/lib/sherpa-onnx-c-api.dll
]]
;
then
mkdir -p ../../build
pushd
../../build
cmake
\
-DCMAKE_INSTALL_PREFIX
=
./install
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
..
cmake --build . --target install --config Release
ls -lh lib
popd
fi
if
[
! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi
fpc
\
-Fu
$SHERPA_ONNX_DIR
/sherpa-onnx/pascal-api
\
-Fl
$SHERPA_ONNX_DIR
/build/install/lib
\
./zipformer_ctc.pas
export
LD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$LD_LIBRARY_PATH
export
DYLD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$DYLD_LIBRARY_PATH
./zipformer_ctc
...
...
pascal-api-examples/streaming-asr/run-zipformer-transducer.sh
0 → 100755
查看文件 @
5791b69
#!/usr/bin/env bash
set
-ex
SCRIPT_DIR
=
$(
cd
--
"
$(
dirname --
"
${
BASH_SOURCE
[0]
}
"
)
"
&> /dev/null
&&
pwd
)
SHERPA_ONNX_DIR
=
$(
cd
$SCRIPT_DIR
/../..
&&
pwd
)
echo
"SHERPA_ONNX_DIR:
$SHERPA_ONNX_DIR
"
if
[[
! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib
&&
! -f ../../build/install/lib/libsherpa-onnx-c-api.so
&&
! -f ../../build/install/lib/sherpa-onnx-c-api.dll
]]
;
then
mkdir -p ../../build
pushd
../../build
cmake
\
-DCMAKE_INSTALL_PREFIX
=
./install
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
..
cmake --build . --target install --config Release
ls -lh lib
popd
fi
if
[
! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi
fpc
\
-Fu
$SHERPA_ONNX_DIR
/sherpa-onnx/pascal-api
\
-Fl
$SHERPA_ONNX_DIR
/build/install/lib
\
./zipformer_transducer.pas
export
LD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$LD_LIBRARY_PATH
export
DYLD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$DYLD_LIBRARY_PATH
./zipformer_transducer
...
...
pascal-api-examples/streaming-asr/zipformer_ctc.pas
0 → 100644
查看文件 @
5791b69
{
Copyright
(c)
2024
Xiaomi
Corporation
}
{
This
file
shows
how
to
use
a
streaming
Zipformer
CTC
model
to
decode
files.
You
can
download
the
model
files
from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}
program
zipformer_ctc;
{
$mode
objfpc
}
uses
sherpa_onnx,
DateUtils,
SysUtils;
var
Config:
TSherpaOnnxOnlineRecognizerConfig;
Recognizer:
TSherpaOnnxOnlineRecognizer;
Stream:
TSherpaOnnxOnlineStream;
RecognitionResult:
TSherpaOnnxOnlineRecognizerResult;
Wave:
TSherpaOnnxWave;
WaveFilename:
AnsiString;
TailPaddings:
array
of
Single;
Start:
TDateTime;
Stop:
TDateTime;
Elapsed:
Single;
Duration:
Single;
RealTimeFactor:
Single;
begin
Initialize(Config);
{
Please
visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
to
download
model
files
used
in
this
file.
}
Config.ModelConfig.Zipformer
2
Ctc.Model
:=
'./sherpa-onnx-streaming-zipformer-ctc-small
-2024-03-18
/ctc-epoch
-30
-avg
-3
-chunk
-16
-left
-128
.int
8
.onnx';
Config.ModelConfig.Tokens
:=
'./sherpa-onnx-streaming-zipformer-ctc-small
-2024-03-18
/tokens.txt';
Config.ModelConfig.Provider
:=
'cpu';
Config.ModelConfig.NumThreads
:=
1
;
Config.ModelConfig.Debug
:=
False;
WaveFilename
:=
'./sherpa-onnx-streaming-zipformer-ctc-small
-2024-03-18
/test_wavs/
8
k.wav';
Wave
:=
SherpaOnnxReadWave(WaveFilename);
Recognizer
:=
TSherpaOnnxOnlineRecognizer.Create(Config);
Start
:=
Now;
Stream
:=
Recognizer.CreateStream();
Stream.AcceptWaveform(Wave.Samples,
Wave.SampleRate);
SetLength(TailPaddings,
Round(Wave.SampleRate
*
0.5
));
{
0.5
seconds
of
padding
}
Stream.AcceptWaveform(TailPaddings,
Wave.SampleRate);
Stream.InputFinished();
while
Recognizer.IsReady(Stream)
do
Recognizer.Decode(Stream);
RecognitionResult
:=
Recognizer.GetResult(Stream);
Stop
:=
Now;
Elapsed
:=
MilliSecondsBetween(Stop,
Start)
/
1000
;
Duration
:=
Length(Wave.Samples)
/
Wave.SampleRate;
RealTimeFactor
:=
Elapsed
/
Duration;
WriteLn(RecognitionResult.ToString);
WriteLn(Format('NumThreads
%d',
[
Config.ModelConfig.NumThreads
]
));
WriteLn(Format('Elapsed
%.
3
f
s',
[
Elapsed
]
));
WriteLn(Format('Wave
duration
%.
3
f
s',
[
Duration
]
));
WriteLn(Format('RTF
=
%.
3
f/%.
3
f
=
%.
3
f',
[
Elapsed
,
Duration
,
RealTimeFactor
]
));
{
Free
resources
to
avoid
memory
leak.
Note:
You
don't
need
to
invoke
them
for
this
simple
script.
However,
you
have
to
invoke
them
in
your
own
large/complex
project.
}
FreeAndNil(Stream);
FreeAndNil(Recognizer);
end.
...
...
pascal-api-examples/streaming-asr/zipformer_ctc_hlg.pas
0 → 100644
查看文件 @
5791b69
{
Copyright
(c)
2024
Xiaomi
Corporation
}
{
This
file
shows
how
to
use
a
streaming
Zipformer
CTC
model
with
HLG
to
decode
files.
You
can
download
the
model
files
from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}
program
zipformer_ctc_hlg;
{
$mode
objfpc
}
uses
sherpa_onnx,
DateUtils,
SysUtils;
var
Config:
TSherpaOnnxOnlineRecognizerConfig;
Recognizer:
TSherpaOnnxOnlineRecognizer;
Stream:
TSherpaOnnxOnlineStream;
RecognitionResult:
TSherpaOnnxOnlineRecognizerResult;
Wave:
TSherpaOnnxWave;
WaveFilename:
AnsiString;
TailPaddings:
array
of
Single;
Start:
TDateTime;
Stop:
TDateTime;
Elapsed:
Single;
Duration:
Single;
RealTimeFactor:
Single;
begin
Initialize(Config);
{
Please
visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
to
download
model
files
used
in
this
file.
}
Config.ModelConfig.Zipformer
2
Ctc.Model
:=
'./sherpa-onnx-streaming-zipformer-ctc-small
-2024-03-18
/ctc-epoch
-30
-avg
-3
-chunk
-16
-left
-128
.int
8
.onnx';
Config.ModelConfig.Tokens
:=
'./sherpa-onnx-streaming-zipformer-ctc-small
-2024-03-18
/tokens.txt';
Config.ModelConfig.Provider
:=
'cpu';
Config.ModelConfig.NumThreads
:=
1
;
Config.ModelConfig.Debug
:=
True;
Config.CtcFstDecoderConfig.Graph
:=
'./sherpa-onnx-streaming-zipformer-ctc-small
-2024-03-18
/HLG.fst';
WaveFilename
:=
'./sherpa-onnx-streaming-zipformer-ctc-small
-2024-03-18
/test_wavs/
8
k.wav';
Wave
:=
SherpaOnnxReadWave(WaveFilename);
Recognizer
:=
TSherpaOnnxOnlineRecognizer.Create(Config);
Start
:=
Now;
Stream
:=
Recognizer.CreateStream();
Stream.AcceptWaveform(Wave.Samples,
Wave.SampleRate);
SetLength(TailPaddings,
Round(Wave.SampleRate
*
0.5
));
{
0.5
seconds
of
padding
}
Stream.AcceptWaveform(TailPaddings,
Wave.SampleRate);
Stream.InputFinished();
while
Recognizer.IsReady(Stream)
do
Recognizer.Decode(Stream);
RecognitionResult
:=
Recognizer.GetResult(Stream);
Stop
:=
Now;
Elapsed
:=
MilliSecondsBetween(Stop,
Start)
/
1000
;
Duration
:=
Length(Wave.Samples)
/
Wave.SampleRate;
RealTimeFactor
:=
Elapsed
/
Duration;
WriteLn(RecognitionResult.ToString);
WriteLn(Format('NumThreads
%d',
[
Config.ModelConfig.NumThreads
]
));
WriteLn(Format('Elapsed
%.
3
f
s',
[
Elapsed
]
));
WriteLn(Format('Wave
duration
%.
3
f
s',
[
Duration
]
));
WriteLn(Format('RTF
=
%.
3
f/%.
3
f
=
%.
3
f',
[
Elapsed
,
Duration
,
RealTimeFactor
]
));
{
Free
resources
to
avoid
memory
leak.
Note:
You
don't
need
to
invoke
them
for
this
simple
script.
However,
you
have
to
invoke
them
in
your
own
large/complex
project.
}
FreeAndNil(Stream);
FreeAndNil(Recognizer);
end.
...
...
pascal-api-examples/streaming-asr/zipformer_transducer.pas
0 → 100644
查看文件 @
5791b69
{
Copyright
(c)
2024
Xiaomi
Corporation
}
{
This
file
shows
how
to
use
a
streaming
Zipformer
transducer
to
decode
files.
You
can
download
the
model
files
from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}
program
zipformer_transducer;
{
$mode
objfpc
}
uses
sherpa_onnx,
DateUtils,
SysUtils;
var
Config:
TSherpaOnnxOnlineRecognizerConfig;
Recognizer:
TSherpaOnnxOnlineRecognizer;
Stream:
TSherpaOnnxOnlineStream;
RecognitionResult:
TSherpaOnnxOnlineRecognizerResult;
Wave:
TSherpaOnnxWave;
WaveFilename:
AnsiString;
TailPaddings:
array
of
Single;
Start:
TDateTime;
Stop:
TDateTime;
Elapsed:
Single;
Duration:
Single;
RealTimeFactor:
Single;
begin
Initialize(Config);
{
Please
visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
to
download
model
files
used
in
this
file.
}
Config.ModelConfig.Transducer.Encoder
:=
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en
-2023-02-20
/encoder-epoch
-99
-avg
-1
.int
8
.onnx';
Config.ModelConfig.Transducer.Decoder
:=
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en
-2023-02-20
/decoder-epoch
-99
-avg
-1
.onnx';
Config.ModelConfig.Transducer.Joiner
:=
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en
-2023-02-20
/joiner-epoch
-99
-avg
-1
.int
8
.onnx';
Config.ModelConfig.Tokens
:=
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en
-2023-02-20
/tokens.txt';
Config.ModelConfig.Provider
:=
'cpu';
Config.ModelConfig.NumThreads
:=
1
;
Config.ModelConfig.Debug
:=
False;
WaveFilename
:=
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en
-2023-02-20
/test_wavs/
0
.wav';
Wave
:=
SherpaOnnxReadWave(WaveFilename);
Recognizer
:=
TSherpaOnnxOnlineRecognizer.Create(Config);
Start
:=
Now;
Stream
:=
Recognizer.CreateStream();
Stream.AcceptWaveform(Wave.Samples,
Wave.SampleRate);
SetLength(TailPaddings,
Round(Wave.SampleRate
*
0.5
));
{
0.5
seconds
of
padding
}
Stream.AcceptWaveform(TailPaddings,
Wave.SampleRate);
Stream.InputFinished();
while
Recognizer.IsReady(Stream)
do
Recognizer.Decode(Stream);
RecognitionResult
:=
Recognizer.GetResult(Stream);
Stop
:=
Now;
Elapsed
:=
MilliSecondsBetween(Stop,
Start)
/
1000
;
Duration
:=
Length(Wave.Samples)
/
Wave.SampleRate;
RealTimeFactor
:=
Elapsed
/
Duration;
WriteLn(RecognitionResult.ToString);
WriteLn(Format('NumThreads
%d',
[
Config.ModelConfig.NumThreads
]
));
WriteLn(Format('Elapsed
%.
3
f
s',
[
Elapsed
]
));
WriteLn(Format('Wave
duration
%.
3
f
s',
[
Duration
]
));
WriteLn(Format('RTF
=
%.
3
f/%.
3
f
=
%.
3
f',
[
Elapsed
,
Duration
,
RealTimeFactor
]
));
{
Free
resources
to
avoid
memory
leak.
Note:
You
don't
need
to
invoke
them
for
this
simple
script.
However,
you
have
to
invoke
them
in
your
own
large/complex
project.
}
FreeAndNil(Stream);
FreeAndNil(Recognizer);
end.
...
...
sherpa-onnx/c-api/c-api.cc
查看文件 @
5791b69
...
...
@@ -75,17 +75,31 @@ SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer(
SHERPA_ONNX_OR
(
config
->
model_config
.
num_threads
,
1
);
recognizer_config
.
model_config
.
provider_config
.
provider
=
SHERPA_ONNX_OR
(
config
->
model_config
.
provider
,
"cpu"
);
if
(
recognizer_config
.
model_config
.
provider_config
.
provider
.
empty
())
{
recognizer_config
.
model_config
.
provider_config
.
provider
=
"cpu"
;
}
recognizer_config
.
model_config
.
model_type
=
SHERPA_ONNX_OR
(
config
->
model_config
.
model_type
,
""
);
recognizer_config
.
model_config
.
debug
=
SHERPA_ONNX_OR
(
config
->
model_config
.
debug
,
0
);
recognizer_config
.
model_config
.
modeling_unit
=
SHERPA_ONNX_OR
(
config
->
model_config
.
modeling_unit
,
"cjkchar"
);
if
(
recognizer_config
.
model_config
.
modeling_unit
.
empty
())
{
recognizer_config
.
model_config
.
modeling_unit
=
"cjkchar"
;
}
recognizer_config
.
model_config
.
bpe_vocab
=
SHERPA_ONNX_OR
(
config
->
model_config
.
bpe_vocab
,
""
);
recognizer_config
.
decoding_method
=
SHERPA_ONNX_OR
(
config
->
decoding_method
,
"greedy_search"
);
if
(
recognizer_config
.
decoding_method
.
empty
())
{
recognizer_config
.
decoding_method
=
"greedy_search"
;
}
recognizer_config
.
max_active_paths
=
SHERPA_ONNX_OR
(
config
->
max_active_paths
,
4
);
...
...
@@ -391,10 +405,19 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig(
SHERPA_ONNX_OR
(
config
->
model_config
.
debug
,
0
);
recognizer_config
.
model_config
.
provider
=
SHERPA_ONNX_OR
(
config
->
model_config
.
provider
,
"cpu"
);
if
(
recognizer_config
.
model_config
.
provider
.
empty
())
{
recognizer_config
.
model_config
.
provider
=
"cpu"
;
}
recognizer_config
.
model_config
.
model_type
=
SHERPA_ONNX_OR
(
config
->
model_config
.
model_type
,
""
);
recognizer_config
.
model_config
.
modeling_unit
=
SHERPA_ONNX_OR
(
config
->
model_config
.
modeling_unit
,
"cjkchar"
);
if
(
recognizer_config
.
model_config
.
modeling_unit
.
empty
())
{
recognizer_config
.
model_config
.
modeling_unit
=
"cjkchar"
;
}
recognizer_config
.
model_config
.
bpe_vocab
=
SHERPA_ONNX_OR
(
config
->
model_config
.
bpe_vocab
,
""
);
...
...
@@ -620,6 +643,10 @@ SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
SHERPA_ONNX_OR
(
config
->
model_config
.
num_threads
,
1
);
spotter_config
.
model_config
.
provider_config
.
provider
=
SHERPA_ONNX_OR
(
config
->
model_config
.
provider
,
"cpu"
);
if
(
spotter_config
.
model_config
.
provider_config
.
provider
.
empty
())
{
spotter_config
.
model_config
.
provider_config
.
provider
=
"cpu"
;
}
spotter_config
.
model_config
.
model_type
=
SHERPA_ONNX_OR
(
config
->
model_config
.
model_type
,
""
);
spotter_config
.
model_config
.
debug
=
...
...
@@ -855,6 +882,10 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
vad_config
.
sample_rate
=
SHERPA_ONNX_OR
(
config
->
sample_rate
,
16000
);
vad_config
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
num_threads
,
1
);
vad_config
.
provider
=
SHERPA_ONNX_OR
(
config
->
provider
,
"cpu"
);
if
(
vad_config
.
provider
.
empty
())
{
vad_config
.
provider
=
"cpu"
;
}
vad_config
.
debug
=
SHERPA_ONNX_OR
(
config
->
debug
,
false
);
if
(
vad_config
.
debug
)
{
...
...
@@ -956,6 +987,10 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
tts_config
.
model
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model
.
num_threads
,
1
);
tts_config
.
model
.
debug
=
config
->
model
.
debug
;
tts_config
.
model
.
provider
=
SHERPA_ONNX_OR
(
config
->
model
.
provider
,
"cpu"
);
if
(
tts_config
.
model
.
provider
.
empty
())
{
tts_config
.
model
.
provider
=
"cpu"
;
}
tts_config
.
rule_fsts
=
SHERPA_ONNX_OR
(
config
->
rule_fsts
,
""
);
tts_config
.
rule_fars
=
SHERPA_ONNX_OR
(
config
->
rule_fars
,
""
);
tts_config
.
max_num_sentences
=
SHERPA_ONNX_OR
(
config
->
max_num_sentences
,
2
);
...
...
@@ -1101,6 +1136,9 @@ SherpaOnnxCreateSpokenLanguageIdentification(
slid_config
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
num_threads
,
1
);
slid_config
.
debug
=
config
->
debug
;
slid_config
.
provider
=
SHERPA_ONNX_OR
(
config
->
provider
,
"cpu"
);
if
(
slid_config
.
provider
.
empty
())
{
slid_config
.
provider
=
"cpu"
;
}
if
(
slid_config
.
debug
)
{
SHERPA_ONNX_LOGE
(
"%s
\n
"
,
slid_config
.
ToString
().
c_str
());
...
...
@@ -1167,6 +1205,9 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor(
c
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
num_threads
,
1
);
c
.
debug
=
SHERPA_ONNX_OR
(
config
->
debug
,
0
);
c
.
provider
=
SHERPA_ONNX_OR
(
config
->
provider
,
"cpu"
);
if
(
c
.
provider
.
empty
())
{
c
.
provider
=
"cpu"
;
}
if
(
config
->
debug
)
{
SHERPA_ONNX_LOGE
(
"%s
\n
"
,
c
.
ToString
().
c_str
());
...
...
@@ -1401,6 +1442,10 @@ const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging(
ac
.
model
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model
.
num_threads
,
1
);
ac
.
model
.
debug
=
config
->
model
.
debug
;
ac
.
model
.
provider
=
SHERPA_ONNX_OR
(
config
->
model
.
provider
,
"cpu"
);
if
(
ac
.
model
.
provider
.
empty
())
{
ac
.
model
.
provider
=
"cpu"
;
}
ac
.
labels
=
SHERPA_ONNX_OR
(
config
->
labels
,
""
);
ac
.
top_k
=
SHERPA_ONNX_OR
(
config
->
top_k
,
5
);
...
...
@@ -1487,6 +1532,9 @@ const SherpaOnnxOfflinePunctuation *SherpaOnnxCreateOfflinePunctuation(
c
.
model
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model
.
num_threads
,
1
);
c
.
model
.
debug
=
config
->
model
.
debug
;
c
.
model
.
provider
=
SHERPA_ONNX_OR
(
config
->
model
.
provider
,
"cpu"
);
if
(
c
.
model
.
provider
.
empty
())
{
c
.
model
.
provider
=
"cpu"
;
}
if
(
c
.
model
.
debug
)
{
SHERPA_ONNX_LOGE
(
"%s
\n
"
,
c
.
ToString
().
c_str
());
...
...
sherpa-onnx/pascal-api/sherpa_onnx.pas
查看文件 @
5791b69
...
...
@@ -4,6 +4,9 @@ unit sherpa_onnx;
{
$mode
objfpc
}
{
$modeSwitch
advancedRecords
}
{
to
support
records
with
methods
}
(*
{
$LongStrings
ON
}
*)
interface
type
...
...
@@ -12,15 +15,117 @@ type
SampleRate:
Integer;
end;
TSherpaOnnxOnlineTransducerModelConfig
=
record
Encoder:
AnsiString;
Decoder:
AnsiString;
Joiner:
AnsiString;
function
ToString:
AnsiString;
end;
TSherpaOnnxOnlineParaformerModelConfig
=
record
Encoder:
AnsiString;
Decoder:
AnsiString;
function
ToString:
AnsiString;
end;
TSherpaOnnxOnlineZipformer
2
CtcModelConfig
=
record
Model:
AnsiString;
function
ToString:
AnsiString;
end;
TSherpaOnnxOnlineModelConfig
=
record
Transducer:
TSherpaOnnxOnlineTransducerModelConfig;
Paraformer:
TSherpaOnnxOnlineParaformerModelConfig;
Zipformer
2
Ctc:
TSherpaOnnxOnlineZipformer
2
CtcModelConfig;
Tokens:
AnsiString;
NumThreads:
Integer;
Provider:
AnsiString;
Debug:
Boolean;
ModelType:
AnsiString;
ModelingUnit:
AnsiString;
BpeVocab:
AnsiString;
function
ToString:
AnsiString;
end;
TSherpaOnnxFeatureConfig
=
record
SampleRate:
Integer;
FeatureDim:
Integer;
function
ToString:
AnsiString;
end;
TSherpaOnnxOnlineCtcFstDecoderConfig
=
record
Graph:
AnsiString;
MaxActive:
Integer;
function
ToString:
AnsiString;
end;
TSherpaOnnxOnlineRecognizerConfig
=
record
FeatConfig:
TSherpaOnnxFeatureConfig;
ModelConfig:
TSherpaOnnxOnlineModelConfig;
DecodingMethod:
AnsiString;
MaxActivePaths:
Integer;
EnableEndpoint:
Boolean;
Rule
1
MinTrailingSilence:
Single;
Rule
2
MinTrailingSilence:
Single;
Rule
3
MinUtteranceLength:
Single;
HotwordsFile:
AnsiString;
HotwordsScore:
Single;
CtcFstDecoderConfig:
TSherpaOnnxOnlineCtcFstDecoderConfig;
RuleFsts:
AnsiString;
RuleFars:
AnsiString;
BlankPenalty:
Single;
function
ToString:
AnsiString;
end;
TSherpaOnnxOnlineRecognizerResult
=
record
Text:
AnsiString;
Tokens:
array
of
AnsiString;
Timestamps:
array
of
Single;
function
ToString:
AnsiString;
end;
TSherpaOnnxOnlineStream
=
class
private
Handle:
Pointer;
public
constructor
Create(P:
Pointer);
destructor
Destroy;
override;
procedure
AcceptWaveform(Samples:
array
of
Single;
SampleRate:
Integer);
procedure
InputFinished;
end;
TSherpaOnnxOnlineRecognizer
=
class
private
Handle:
Pointer;
public
constructor
Create(Config:
TSherpaOnnxOnlineRecognizerConfig);
destructor
Destroy;
override;
function
CreateStream:
TSherpaOnnxOnlineStream;
overload;
function
CreateStream(Hotwords:
AnsiString):
TSherpaOnnxOnlineStream;
overload;
function
IsReady(Stream:
TSherpaOnnxOnlineStream):
Boolean;
procedure
Decode(Stream:
TSherpaOnnxOnlineStream);
procedure
Reset(Stream:
TSherpaOnnxOnlineStream);
function
IsEndpoint(Stream:
TSherpaOnnxOnlineStream):
Boolean;
function
GetResult(Stream:
TSherpaOnnxOnlineStream):
TSherpaOnnxOnlineRecognizerResult;
end;
{
It
supports
reading
a
single
channel
wave
with
16-bit
encoded
samples.
Samples
are
normalized
to
the
range
[-1,
1].
}
function
SherpaOnnxReadWave(Filename:
s
tring):
TSherpaOnnxWave;
function
SherpaOnnxReadWave(Filename:
AnsiS
tring):
TSherpaOnnxWave;
implementation
uses
ctypes;
ctypes,
fpjson,
{
See
-
https://wiki.freepascal.org/fcl-json
-
https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
}
jsonparser,
SysUtils;
const
{
See
https://www.freepascal.org/docs-html/prog/progap7.html
}
...
...
@@ -47,31 +152,383 @@ type
PSherpaOnnxWave
=
^SherpaOnnxWave;
SherpaOnnxOnlineTransducerModelConfig
=
record
Encoder:
PAnsiChar;
Decoder:
PAnsiChar;
Joiner:
PAnsiChar;
end;
SherpaOnnxOnlineParaformerModelConfig
=
record
Encoder:
PAnsiChar;
Decoder:
PAnsiChar;
end;
SherpaOnnxOnlineZipformer
2
CtcModelConfig
=
record
Model:
PAnsiChar;
end;
SherpaOnnxOnlineModelConfig=
record
Transducer:
SherpaOnnxOnlineTransducerModelConfig;
Paraformer:
SherpaOnnxOnlineParaformerModelConfig;
Zipformer
2
Ctc:
SherpaOnnxOnlineZipformer
2
CtcModelConfig;
Tokens:
PAnsiChar;
NumThreads:
cint
32
;
Provider:
PAnsiChar;
Debug:
cint
32
;
ModelType:
PAnsiChar;
ModelingUnit:
PAnsiChar;
BpeVocab:
PAnsiChar;
end;
SherpaOnnxFeatureConfig
=
record
SampleRate:
cint
32
;
FeatureDim:
cint
32
;
end;
SherpaOnnxOnlineCtcFstDecoderConfig
=
record
Graph:
PAnsiChar;
MaxActive:
cint
32
;
end;
SherpaOnnxOnlineRecognizerConfig
=
record
FeatConfig:
SherpaOnnxFeatureConfig;
ModelConfig:
SherpaOnnxOnlineModelConfig;
DecodingMethod:
PAnsiChar;
MaxActivePaths:
cint
32
;
EnableEndpoint:
cint
32
;
Rule
1
MinTrailingSilence:
Single;
Rule
2
MinTrailingSilence:
Single;
Rule
3
MinUtteranceLength:
Single;
HotwordsFile:
PAnsiChar;
HotwordsScore:
Single;
CtcFstDecoderConfig:
SherpaOnnxOnlineCtcFstDecoderConfig;
RuleFsts:
PAnsiChar;
RuleFars:
PAnsiChar;
BlankPenalty:
Single;
end;
PSherpaOnnxOnlineRecognizerConfig
=
^SherpaOnnxOnlineRecognizerConfig;
function
SherpaOnnxCreateOnlineRecognizer(Config:
PSherpaOnnxOnlineRecognizerConfig):
Pointer;
cdecl;
external
SherpaOnnxLibName;
procedure
SherpaOnnxDestroyOnlineRecognizer(Recognizer:
Pointer);
cdecl;
external
SherpaOnnxLibName;
function
SherpaOnnxCreateOnlineStream(Recognizer:
Pointer):
Pointer;
cdecl;
external
SherpaOnnxLibName;
function
SherpaOnnxCreateOnlineStreamWithHotwords(Recognizer:
Pointer;
Hotwords:
PAnsiChar):
Pointer;
cdecl;
external
SherpaOnnxLibName;
procedure
SherpaOnnxDestroyOnlineStream(Recognizer:
Pointer);
cdecl;
external
SherpaOnnxLibName;
procedure
SherpaOnnxOnlineStreamAcceptWaveform(Stream:
Pointer;
SampleRate:
cint
32
;
Samples:
pcfloat;
N:
cint
32
);
cdecl;
external
SherpaOnnxLibName;
procedure
SherpaOnnxOnlineStreamInputFinished(Stream:
Pointer);
cdecl;
external
SherpaOnnxLibName;
function
SherpaOnnxIsOnlineStreamReady(Recognizer:
Pointer;
Stream:
Pointer):
cint
32
;
cdecl;
external
SherpaOnnxLibName;
procedure
SherpaOnnxDecodeOnlineStream(Recognizer:
Pointer;
Stream:
Pointer);
cdecl;
external
SherpaOnnxLibName;
procedure
SherpaOnnxOnlineStreamReset(Recognizer:
Pointer;
Stream:
Pointer);
cdecl;
external
SherpaOnnxLibName;
function
SherpaOnnxOnlineStreamIsEndpoint(Recognizer:
Pointer;
Stream:
Pointer):
cint
32
;
cdecl;
external
SherpaOnnxLibName;
function
SherpaOnnxGetOnlineStreamResultAsJson(Recognizer:
Pointer;
Stream:
Pointer):
PAnsiChar;
cdecl;
external
SherpaOnnxLibName;
procedure
SherpaOnnxDestroyOnlineStreamResultJson(PJson:
PAnsiChar);
cdecl;
external
SherpaOnnxLibName;
function
SherpaOnnxReadWaveWrapper(Filename:
PAnsiChar):
PSherpaOnnxWave;
cdecl;
external
SherpaOnnxLibName
name
'SherpaOnnxReadWave';
procedure
SherpaOnnxFreeWaveWrapper(P:
PSherpaOnnxWave);
cdecl;
external
SherpaOnnxLibName
name
'SherpaOnnxFreeWave';
function
SherpaOnnxReadWave(Filename:
s
tring):
TSherpaOnnxWave;
function
SherpaOnnxReadWave(Filename:
AnsiS
tring):
TSherpaOnnxWave;
var
AnsiFilename:
AnsiString;
PFilename:
PAnsiChar;
PWave:
PSherpaOnnxWave;
I:
Integer;
begin
AnsiFilename
:=
Filename;
PFilename
:=
PAnsiChar(AnsiFilename);
PFilename
:=
PAnsiChar(Filename);
PWave
:=
SherpaOnnxReadWaveWrapper(PFilename);
Result.Samples
:=
nil;
SetLength(Result.Samples,
PWave^.NumSamples);
Result.SampleRate
:=
PWave^.SampleRate;
for
I
:=
Low(Result.Samples)
to
High(Result.Samples)
do
Result.Samples
[
i
]
:=
PWave^.Samples
[
i
]
;
Result.Samples
[
I
]
:=
PWave^.Samples
[
I
]
;
SherpaOnnxFreeWaveWrapper(PWave);
end;
function
TSherpaOnnxOnlineTransducerModelConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxOnlineTransducerModelConfig(Encoder
:=
%s,
Decoder
:=
%s,
Joiner
:=
%s)',
[
Self.Encoder
,
Self.Decoder
,
Self.Joiner
]
);
end;
function
TSherpaOnnxOnlineParaformerModelConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxOnlineParaformerModelConfig(Encoder
:=
%s,
Decoder
:=
%s)',
[
Self.Encoder
,
Self.Decoder
]
);
end;
function
TSherpaOnnxOnlineZipformer
2
CtcModelConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxOnlineZipformer
2
CtcModelConfig(Model
:=
%s)',
[
Self.Model
]
);
end;
function
TSherpaOnnxOnlineModelConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxOnlineModelConfig(Transducer
:=
%s,
'
+
'Paraformer
:=
%s,'
+
'Zipformer
2
Ctc
:=
%s,
'
+
'Tokens
:=
%s,
'
+
'NumThreads
:=
%d,
'
+
'Provider
:=
%s,
'
+
'Debug
:=
%s,
'
+
'ModelType
:=
%s,
'
+
'ModelingUnit
:=
%s,
'
+
'BpeVocab
:=
%s)'
,
[
Self.Transducer.ToString
,
Self.Paraformer.ToString
,
Self.Zipformer
2
Ctc.ToString
,
Self.Tokens
,
Self.NumThreads
,
Self.Provider
,
Self.Debug.ToString
,
Self.ModelType
,
Self.ModelingUnit
,
Self.BpeVocab
]
);
end;
function
TSherpaOnnxFeatureConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxFeatureConfig(SampleRate
:=
%d,
FeatureDim
:=
%d)',
[
Self.SampleRate
,
Self.FeatureDim
]
);
end;
function
TSherpaOnnxOnlineCtcFstDecoderConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxOnlineCtcFstDecoderConfig(Graph
:=
%s,
MaxActive
:=
%d)',
[
Self.Graph
,
Self.MaxActive
]
);
end;
function
TSherpaOnnxOnlineRecognizerConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfg
:=
%s,
'
+
'ModelConfig
:=
%s,
'
+
'DecodingMethod
:=
%s,
'
+
'MaxActivePaths
:=
%d,
'
+
'EnableEndpoint
:=
%s,
'
+
'Rule
1
MinTrailingSilence
:=
%.
1
f,
'
+
'Rule
2
MinTrailingSilence
:=
%.
1
f,
'
+
'Rule
3
MinUtteranceLength
:=
%.
1
f,
'
+
'HotwordsFile
:=
%s,
'
+
'HotwordsScore
:=
%.
1
f,
'
+
'CtcFstDecoderConfig
:=
%s,
'
+
'RuleFsts
:=
%s,
'
+
'RuleFars
:=
%s,
'
+
'BlankPenalty
:=
%.
1
f'
+
')'
,
[
Self.FeatConfig.ToString
,
Self.ModelConfig.ToString
,
Self.DecodingMethod
,
Self.MaxActivePaths
,
Self.EnableEndpoint.ToString
,
Self.Rule
1
MinTrailingSilence
,
Self.Rule
2
MinTrailingSilence
,
Self.Rule
3
MinUtteranceLength
,
Self.HotwordsFile
,
Self.HotwordsScore
,
Self.CtcFstDecoderConfig.ToString
,
Self.RuleFsts
,
Self.RuleFars
,
Self.BlankPenalty
]
);
end;
function
TSherpaOnnxOnlineRecognizerResult.ToString:
AnsiString;
var
TokensStr:
AnsiString;
S:
AnsiString;
TimestampStr:
AnsiString;
T:
Single;
Sep:
AnsiString;
begin
TokensStr
:=
'
[
';
Sep
:=
'';
for
S
in
Self.Tokens
do
begin
TokensStr
:=
TokensStr
+
Sep
+
S;
Sep
:=
'
,
';
end;
TokensStr
:=
TokensStr
+
'
]
';
TimestampStr
:=
'
[
';
Sep
:=
'';
for
T
in
Self.Timestamps
do
begin
TimestampStr
:=
TimestampStr
+
Sep
+
Format('%.
2
f'
,
[
T
]
);
Sep
:=
'
,
';
end;
TimestampStr
:=
TimestampStr
+
'
]
';
Result
:=
Format('TSherpaOnnxOnlineRecognizerResult(Text
:=
%s,
'
+
'Tokens
:=
%s,
'
+
'Timestamps
:=
%s,
'
+
')',
[
Self.Text
,
TokensStr
,
TimestampStr
]
);
end;
constructor
TSherpaOnnxOnlineRecognizer.Create(Config:
TSherpaOnnxOnlineRecognizerConfig);
var
C:
SherpaOnnxOnlineRecognizerConfig;
begin
Initialize(C);
C.FeatConfig.SampleRate
:=
Config.FeatConfig.SampleRate;
C.FeatConfig.FeatureDim
:=
Config.FeatConfig.FeatureDim;
C.ModelConfig.Transducer.Encoder
:=
PAnsiChar(Config.ModelConfig.Transducer.Encoder);
C.ModelConfig.Transducer.Decoder
:=
PAnsiChar(Config.ModelConfig.Transducer.Decoder);
C.ModelConfig.Transducer.Joiner
:=
PAnsiChar(Config.ModelConfig.Transducer.Joiner);
C.ModelConfig.Paraformer.Encoder
:=
PAnsiChar(Config.ModelConfig.Paraformer.Encoder);
C.ModelConfig.Paraformer.Decoder
:=
PAnsiChar(Config.ModelConfig.Paraformer.Decoder);
C.ModelConfig.Zipformer
2
Ctc.Model
:=
PAnsiChar(Config.ModelConfig.Zipformer
2
Ctc.Model);
C.ModelConfig.Tokens
:=
PAnsiChar(Config.ModelConfig.Tokens);
C.ModelConfig.NumThreads
:=
Config.ModelConfig.NumThreads;
C.ModelConfig.Provider
:=
PAnsiChar(Config.ModelConfig.Provider);
C.ModelConfig.Debug
:=
Ord(Config.ModelConfig.Debug);
C.ModelConfig.ModelType
:=
PAnsiChar(Config.ModelConfig.ModelType);
C.ModelConfig.ModelingUnit
:=
PAnsiChar(Config.ModelConfig.ModelingUnit);
C.ModelConfig.BpeVocab
:=
PAnsiChar(Config.ModelConfig.BpeVocab);
C.DecodingMethod
:=
PAnsiChar(Config.DecodingMethod);
C.MaxActivePaths
:=
Config.MaxActivePaths;
C.EnableEndpoint
:=
Ord(Config.EnableEndpoint);
C.Rule
1
MinTrailingSilence
:=
Config.Rule
1
MinTrailingSilence;
C.Rule
2
MinTrailingSilence
:=
Config.Rule
2
MinTrailingSilence;
C.Rule
3
MinUtteranceLength
:=
Config.Rule
3
MinUtteranceLength;
C.HotwordsFile
:=
PAnsiChar(Config.HotwordsFile);
C.HotwordsScore
:=
Config.HotwordsScore;
C.CtcFstDecoderConfig.Graph
:=
PAnsiChar(Config.CtcFstDecoderConfig.Graph);
C.CtcFstDecoderConfig.MaxActive
:=
Config.CtcFstDecoderConfig.MaxActive;
C.RuleFsts
:=
PAnsiChar(Config.RuleFsts);
C.RuleFars
:=
PAnsiChar(Config.RuleFars);
C.BlankPenalty
:=
Config.BlankPenalty;
Self.Handle
:=
SherpaOnnxCreateOnlineRecognizer(@C);
end;
destructor
TSherpaOnnxOnlineRecognizer.Destroy;
begin
SherpaOnnxDestroyOnlineRecognizer(Self.Handle);
Self.Handle
:=
nil;
end;
function
TSherpaOnnxOnlineRecognizer.CreateStream:
TSherpaOnnxOnlineStream;
var
Stream:
Pointer;
begin
Stream
:=
SherpaOnnxCreateOnlineStream(Self.Handle);
Result
:=
TSherpaOnnxOnlineStream.Create(Stream);
end;
function
TSherpaOnnxOnlineRecognizer.CreateStream(Hotwords:
AnsiString):
TSherpaOnnxOnlineStream;
var
Stream:
Pointer;
begin
Stream
:=
SherpaOnnxCreateOnlineStreamWithHotwords(Self.Handle,
PAnsiChar(Hotwords));
Result
:=
TSherpaOnnxOnlineStream.Create(Stream);
end;
function
TSherpaOnnxOnlineRecognizer.IsReady(Stream:
TSherpaOnnxOnlineStream):
Boolean;
begin
Result
:=
SherpaOnnxIsOnlineStreamReady(Self.Handle,
Stream.Handle)
=
1
;
end;
procedure
TSherpaOnnxOnlineRecognizer.Decode(Stream:
TSherpaOnnxOnlineStream);
begin
SherpaOnnxDecodeOnlineStream(Self.Handle,
Stream.Handle);
end;
procedure
TSherpaOnnxOnlineRecognizer.Reset(Stream:
TSherpaOnnxOnlineStream);
begin
SherpaOnnxOnlineStreamReset(Self.Handle,
Stream.Handle);
end;
function
TSherpaOnnxOnlineRecognizer.IsEndpoint(Stream:
TSherpaOnnxOnlineStream):
Boolean;
begin
Result
:=
SherpaOnnxOnlineStreamIsEndpoint(Self.Handle,
Stream.Handle)
=
1
;
end;
function
TSherpaOnnxOnlineRecognizer.GetResult(Stream:
TSherpaOnnxOnlineStream):
TSherpaOnnxOnlineRecognizerResult;
var
pJson:
PAnsiChar;
JsonData:
TJSONData;
JsonObject
:
TJSONObject;
JsonEnum:
TJSONEnum;
I:
Integer;
begin
pJson
:=
SherpaOnnxGetOnlineStreamResultAsJson(Self.Handle,
Stream.Handle);
{
-
https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
-
https://www.freepascal.org/daily/doc/fcl/fpjson/tjsondata.html
-
https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonobject.html
-
https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonenum.html
}
JsonData
:=
GetJSON(AnsiString(pJson),
False);
JsonObject
:=
JsonData
as
TJSONObject;
Result.Text
:=
JsonObject.Strings
[
'text'
]
;
SetLength(Result.Tokens,
JsonObject.Arrays
[
'tokens'
]
.Count);
I
:=
0
;
for
JsonEnum
in
JsonObject.Arrays
[
'tokens'
]
do
begin
Result.Tokens
[
I
]
:=
JsonEnum.Value.AsString;
Inc(I);
end;
SetLength(Result.Timestamps,
JsonObject.Arrays
[
'timestamps'
]
.Count);
I
:=
0
;
for
JsonEnum
in
JsonObject.Arrays
[
'timestamps'
]
do
begin
Result.Timestamps
[
I
]
:=
JsonEnum.Value.AsFloat;
Inc(I);
end;
SherpaOnnxDestroyOnlineStreamResultJson(pJson);
end;
constructor
TSherpaOnnxOnlineStream.Create(P:
Pointer);
begin
Self.Handle
:=
P;
end;
destructor
TSherpaOnnxOnlineStream.Destroy;
begin
SherpaOnnxDestroyOnlineStream(Self.Handle);
Self.Handle
:=
nil;
end;
procedure
TSherpaOnnxOnlineStream.AcceptWaveform(Samples:
array
of
Single;
SampleRate:
Integer);
begin
SherpaOnnxOnlineStreamAcceptWaveform(Self.Handle,
SampleRate,
pcfloat(Samples),
Length(Samples));
end;
procedure
TSherpaOnnxOnlineStream.InputFinished;
begin
SherpaOnnxOnlineStreamInputFinished(Self.Handle);
end;
end.
...
...
请
注册
或
登录
后发表评论