Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-07-12 19:49:52 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-07-12 19:49:52 +0800
Commit
fc2fc3d95dfd9b89d3f884d2e4c0590198fb37e6
fc2fc3d9
1 parent
0514aeeb
Add Pascal API for ten-vad (#2388)
隐藏空白字符变更
内嵌
并排对比
正在显示
6 个修改的文件
包含
238 行增加
和
21 行删除
.github/workflows/pascal.yaml
pascal-api-examples/vad/.gitignore
pascal-api-examples/vad/remove_silence.pas
pascal-api-examples/vad/remove_silence_ten_vad.pas
pascal-api-examples/vad/run-remove-silence-ten-vad.sh
sherpa-onnx/pascal-api/sherpa_onnx.pas
.github/workflows/pascal.yaml
查看文件 @
fc2fc3d
...
...
@@ -136,6 +136,27 @@ jobs:
cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/vad-with-non-streaming-asr
fi
-
name
:
Run Pascal test (VAD test)
shell
:
bash
run
:
|
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
cd ./pascal-api-examples
pushd vad
./run-circular-buffer.sh
echo "---"
time ./run-remove-silence-ten-vad.sh
echo "---"
time ./run-remove-silence.sh
echo "---"
ls -lh
popd
-
name
:
Run Speech Enhancement test (GTCRN)
shell
:
bash
run
:
|
...
...
@@ -298,24 +319,6 @@ jobs:
popd
-
name
:
Run Pascal test (VAD test)
shell
:
bash
run
:
|
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
cd ./pascal-api-examples
pushd vad
./run-circular-buffer.sh
echo "---"
time ./run-remove-silence.sh
echo "---"
ls -lh
popd
-
name
:
Run Pascal test (Read wav test)
shell
:
bash
run
:
|
...
...
pascal-api-examples/vad/.gitignore
查看文件 @
fc2fc3d
!run*.sh
circular_buffer
remove_silence
remove_silence_ten_vad
...
...
pascal-api-examples/vad/remove_silence.pas
查看文件 @
fc2fc3d
{
Copyright
(c)
2024
Xiaomi
Corporation
}
{
This
file
shows
how
to
use
the
VAD
API
from
sherpa-onnx
to
remove
silences
from
a
wave
file.
to
remove
silences
from
a
wave
file
with
silero-vad
.
}
program
main;
...
...
pascal-api-examples/vad/remove_silence_ten_vad.pas
0 → 100644
查看文件 @
fc2fc3d
{
Copyright
(c)
2025
Xiaomi
Corporation
}
{
This
file
shows
how
to
use
the
VAD
API
from
sherpa-onnx
to
remove
silences
from
a
wave
file
with
ten-vad.
}
program
main;
{
$mode
delphi
}
uses
sherpa_onnx,
SysUtils;
var
Wave:
TSherpaOnnxWave;
Config:
TSherpaOnnxVadModelConfig;
Vad:
TSherpaOnnxVoiceActivityDetector;
Offset:
Integer;
WindowSize:
Integer;
SpeechSegment:
TSherpaOnnxSpeechSegment;
Start:
Single;
Duration:
Single;
SampleRate:
Integer;
AllSpeechSegment:
array
of
TSherpaOnnxSpeechSegment;
AllSamples:
array
of
Single;
N:
Integer;
I:
Integer;
begin
SampleRate
:=
16000
;
{
Please
don't
change
it
unless
you
know
the
details
}
Wave
:=
SherpaOnnxReadWave('./lei-jun-test.wav');
if
Wave.SampleRate
<>
SampleRate
then
begin
WriteLn(Format('Expected
sample
rate:
%d.
Given:
%d',
[
SampleRate
,
Wave.SampleRate
]
));
Exit;
end;
WindowSize
:=
256
;
{
Please
don't
change
it
unless
you
know
the
details
}
Initialize(Config);
Config.TenVad.Model
:=
'./ten-vad.onnx';
Config.TenVad.MinSpeechDuration
:=
0.25
;
Config.TenVad.MinSilenceDuration
:=
0.5
;
Config.TenVad.Threshold
:=
0.25
;
Config.TenVad.WindowSize
:=
WindowSize;
Config.NumThreads:=
1
;
Config.Debug:=
True;
Config.Provider:=
'cpu';
Config.SampleRate
:=
SampleRate;
Vad
:=
TSherpaOnnxVoiceActivityDetector.Create(Config,
20
);
AllSpeechSegment
:=
nil;
AllSamples
:=
nil;
Offset
:=
0
;
while
Offset
+
WindowSize
<=
Length(Wave.Samples)
do
begin
Vad.AcceptWaveform(Wave.Samples,
Offset,
WindowSize);
Inc(Offset,
WindowSize);
while
not
Vad.IsEmpty
do
begin
SetLength(AllSpeechSegment,
Length(AllSpeechSegment)
+
1
);
SpeechSegment
:=
Vad.Front();
Vad.Pop();
AllSpeechSegment
[
Length(AllSpeechSegment)
-1
]
:=
SpeechSegment;
Start
:=
SpeechSegment.Start
/
SampleRate;
Duration
:=
Length(SpeechSegment.Samples)
/
SampleRate;
WriteLn(Format('%.
3
f
--
%.
3
f',
[
Start
,
Start
+
Duration
]
));
end;
end;
Vad.Flush;
while
not
Vad.IsEmpty
do
begin
SetLength(AllSpeechSegment,
Length(AllSpeechSegment)
+
1
);
SpeechSegment
:=
Vad.Front();
Vad.Pop();
AllSpeechSegment
[
Length(AllSpeechSegment)
-1
]
:=
SpeechSegment;
Start
:=
SpeechSegment.Start
/
SampleRate;
Duration
:=
Length(SpeechSegment.Samples)
/
SampleRate;
WriteLn(Format('%.
3
f
--
%.
3
f',
[
Start
,
Start
+
Duration
]
));
end;
N
:=
0
;
for
SpeechSegment
in
AllSpeechSegment
do
Inc(N,
Length(SpeechSegment.Samples));
SetLength(AllSamples,
N);
N
:=
0
;
for
SpeechSegment
in
AllSpeechSegment
do
begin
for
I
:=
Low(SpeechSegment.Samples)
to
High(SpeechSegment.Samples)
do
begin
AllSamples
[
N
]
:=
SpeechSegment.Samples
[
I
]
;
Inc(N);
end;
end;
SherpaOnnxWriteWave('./lei-jun-test-no-silence-ten-vad.wav',
AllSamples,
SampleRate);
WriteLn('Saved
to
./lei-jun-test-no-silence-ten-vad.wav');
FreeAndNil(Vad);
end.
...
...
pascal-api-examples/vad/run-remove-silence-ten-vad.sh
0 → 100755
查看文件 @
fc2fc3d
#!/usr/bin/env bash
set
-ex
SCRIPT_DIR
=
$(
cd
--
"
$(
dirname --
"
${
BASH_SOURCE
[0]
}
"
)
"
&> /dev/null
&&
pwd
)
SHERPA_ONNX_DIR
=
$(
cd
$SCRIPT_DIR
/../..
&&
pwd
)
echo
"SHERPA_ONNX_DIR:
$SHERPA_ONNX_DIR
"
if
[[
! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib
&&
! -f ../../build/install/lib/libsherpa-onnx-c-api.so
&&
! -f ../../build/install/lib/sherpa-onnx-c-api.dll
]]
;
then
mkdir -p ../../build
pushd
../../build
cmake
\
-DCMAKE_INSTALL_PREFIX
=
./install
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
..
cmake --build . --target install --config Release
popd
fi
if
[[
! -f ./ten-vad.onnx
]]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi
if
[
! -f ./lei-jun-test.wav
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi
fpc
\
-dSHERPA_ONNX_USE_SHARED_LIBS
\
-Fu
$SHERPA_ONNX_DIR
/sherpa-onnx/pascal-api
\
-Fl
$SHERPA_ONNX_DIR
/build/install/lib
\
./remove_silence_ten_vad.pas
export
LD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$LD_LIBRARY_PATH
export
DYLD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$DYLD_LIBRARY_PATH
./remove_silence_ten_vad
...
...
sherpa-onnx/pascal-api/sherpa_onnx.pas
查看文件 @
fc2fc3d
...
...
@@ -426,12 +426,24 @@ type
class
operator
Initialize(
{
$IFDEF
FPC
}
var
{
$ELSE
}
out
{
$ENDIF
}
Dest:
TSherpaOnnxSileroVadModelConfig);
end;
TSherpaOnnxTenVadModelConfig
=
record
Model:
AnsiString;
Threshold:
Single;
MinSilenceDuration:
Single;
MinSpeechDuration:
Single;
WindowSize:
Integer;
MaxSpeechDuration:
Single;
function
ToString:
AnsiString;
class
operator
Initialize(
{
$IFDEF
FPC
}
var
{
$ELSE
}
out
{
$ENDIF
}
Dest:
TSherpaOnnxTenVadModelConfig);
end;
TSherpaOnnxVadModelConfig
=
record
SileroVad:
TSherpaOnnxSileroVadModelConfig;
SampleRate:
Integer;
NumThreads:
Integer;
Provider:
AnsiString;
Debug:
Boolean;
TenVad:
TSherpaOnnxTenVadModelConfig;
function
ToString:
AnsiString;
class
operator
Initialize(
{
$IFDEF
FPC
}
var
{
$ELSE
}
out
{
$ENDIF
}
Dest:
TSherpaOnnxVadModelConfig);
end;
...
...
@@ -829,12 +841,23 @@ type
WindowSize:
cint
32
;
MaxSpeechDuration:
cfloat;
end;
SherpaOnnxTenVadModelConfig
=
record
Model:
PAnsiChar;
Threshold:
cfloat;
MinSilenceDuration:
cfloat;
MinSpeechDuration:
cfloat;
WindowSize:
cint
32
;
MaxSpeechDuration:
cfloat;
end;
SherpaOnnxVadModelConfig
=
record
SileroVad:
SherpaOnnxSileroVadModelConfig;
SampleRate:
cint
32
;
NumThreads:
cint
32
;
Provider:
PAnsiChar;
Debug:
cint
32
;
TenVad:
SherpaOnnxTenVadModelConfig;
end;
PSherpaOnnxVadModelConfig
=
^SherpaOnnxVadModelConfig;
...
...
@@ -1907,6 +1930,21 @@ begin
]
);
end;
function
TSherpaOnnxTenVadModelConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxTenVadModelConfig('
+
'Model
:=
%s,
'
+
'Threshold
:=
%.
2
f,
'
+
'MinSilenceDuration
:=
%.
2
f,
'
+
'MinSpeechDuration
:=
%.
2
f,
'
+
'WindowSize
:=
%d,
'
+
'MaxSpeechDuration
:=
%.
2
f'
+
')',
[
Self.Model
,
Self.Threshold
,
Self.MinSilenceDuration
,
Self.MinSpeechDuration
,
Self.WindowSize
,
Self.MaxSpeechDuration
]
);
end;
class
operator
TSherpaOnnxSileroVadModelConfig.Initialize(
{
$IFDEF
FPC
}
var
{
$ELSE
}
out
{
$ENDIF
}
Dest:
TSherpaOnnxSileroVadModelConfig);
begin
Dest.Threshold
:=
0.5
;
...
...
@@ -1916,6 +1954,15 @@ begin
Dest.MaxSpeechDuration
:=
5.0
;
end;
class
operator
TSherpaOnnxTenVadModelConfig.Initialize(
{
$IFDEF
FPC
}
var
{
$ELSE
}
out
{
$ENDIF
}
Dest:
TSherpaOnnxTenVadModelConfig);
begin
Dest.Threshold
:=
0.5
;
Dest.MinSilenceDuration
:=
0.5
;
Dest.MinSpeechDuration
:=
0.25
;
Dest.WindowSize
:=
256
;
Dest.MaxSpeechDuration
:=
5.0
;
end;
function
TSherpaOnnxVadModelConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxVadModelConfig('
+
...
...
@@ -1923,10 +1970,11 @@ begin
'SampleRate
:=
%d,
'
+
'NumThreads
:=
%d,
'
+
'Provider
:=
%s,
'
+
'Debug
:=
%s'
+
'Debug
:=
%s,
'
+
'TenVad
:=
%s'
+
')',
[
Self.SileroVad.ToString
,
Self.SampleRate
,
Self.NumThreads
,
Self.Provider
,
Self.Debug.ToString
Self.Debug.ToString
,
Self.TenVad.ToString
]
);
end;
...
...
@@ -2077,6 +2125,13 @@ begin
C.SileroVad.WindowSize
:=
Config.SileroVad.WindowSize;
C.SileroVad.MaxSpeechDuration
:=
Config.SileroVad.MaxSpeechDuration;
C.TenVad.Model
:=
PAnsiChar(Config.TenVad.Model);
C.TenVad.Threshold
:=
Config.TenVad.Threshold;
C.TenVad.MinSilenceDuration
:=
Config.TenVad.MinSilenceDuration;
C.TenVad.MinSpeechDuration
:=
Config.TenVad.MinSpeechDuration;
C.TenVad.WindowSize
:=
Config.TenVad.WindowSize;
C.TenVad.MaxSpeechDuration
:=
Config.TenVad.MaxSpeechDuration;
C.SampleRate
:=
Config.SampleRate;
C.NumThreads
:=
Config.NumThreads;
C.Provider
:=
PAnsiChar(Config.Provider);
...
...
请
注册
或
登录
后发表评论