Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-10-27 12:21:16 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-10-27 12:21:16 +0800
Commit
cdd8e1bbcb51aeb0b1bd73c4138f0e4ad7061bbd
cdd8e1bb
1 parent
54468a73
Add Pascal API for Moonshine models (#1482)
隐藏空白字符变更
内嵌
并排对比
正在显示
8 个修改的文件
包含
354 行增加
和
3 行删除
.github/workflows/pascal.yaml
pascal-api-examples/non-streaming-asr/.gitignore
pascal-api-examples/non-streaming-asr/moonshine.pas
pascal-api-examples/non-streaming-asr/run-moonshine.sh
pascal-api-examples/vad-with-non-streaming-asr/.gitignore
pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-moonshine.sh
pascal-api-examples/vad-with-non-streaming-asr/vad_with_moonshine.pas
sherpa-onnx/pascal-api/sherpa_onnx.pas
.github/workflows/pascal.yaml
查看文件 @
cdd8e1b
...
...
@@ -165,6 +165,10 @@ jobs:
cd ./pascal-api-examples
pushd vad-with-non-streaming-asr
time ./run-vad-with-moonshine.sh
rm -rf sherpa-onnx-*
echo "---"
time ./run-vad-with-whisper.sh
rm -rf sherpa-onnx-*
echo "---"
...
...
@@ -220,6 +224,10 @@ jobs:
rm -rf sherpa-onnx-*
echo "---"
./run-moonshine.sh
rm -rf sherpa-onnx-*
echo "---"
./run-whisper.sh
rm -rf sherpa-onnx-*
echo "---"
...
...
pascal-api-examples/non-streaming-asr/.gitignore
查看文件 @
cdd8e1b
...
...
@@ -7,3 +7,4 @@ paraformer
paraformer_itn
sense_voice
telespeech_ctc
moonshine
...
...
pascal-api-examples/non-streaming-asr/moonshine.pas
0 → 100644
查看文件 @
cdd8e1b
{
Copyright
(c)
2024
Xiaomi
Corporation
}
{
This
file
shows
how
to
use
a
non-streaming
Moonshine
model
to
decode
files.
You
can
download
the
model
files
from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}
program
moonshine;
{
$mode
objfpc
}
uses
sherpa_onnx,
DateUtils,
SysUtils;
var
Wave:
TSherpaOnnxWave;
WaveFilename:
AnsiString;
Config:
TSherpaOnnxOfflineRecognizerConfig;
Recognizer:
TSherpaOnnxOfflineRecognizer;
Stream:
TSherpaOnnxOfflineStream;
RecognitionResult:
TSherpaOnnxOfflineRecognizerResult;
Start:
TDateTime;
Stop:
TDateTime;
Elapsed:
Single;
Duration:
Single;
RealTimeFactor:
Single;
begin
Initialize(Config);
Config.ModelConfig.Moonshine.Preprocessor
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/preprocess.onnx';
Config.ModelConfig.Moonshine.Encoder
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/encode.int
8
.onnx';
Config.ModelConfig.Moonshine.UncachedDecoder
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/uncached_decode.int
8
.onnx';
Config.ModelConfig.Moonshine.CachedDecoder
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/cached_decode.int
8
.onnx';
Config.ModelConfig.Tokens
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/tokens.txt';
Config.ModelConfig.Provider
:=
'cpu';
Config.ModelConfig.NumThreads
:=
1
;
Config.ModelConfig.Debug
:=
False;
WaveFilename
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/test_wavs/
0
.wav';
Wave
:=
SherpaOnnxReadWave(WaveFilename);
Recognizer
:=
TSherpaOnnxOfflineRecognizer.Create(Config);
Stream
:=
Recognizer.CreateStream();
Start
:=
Now;
Stream.AcceptWaveform(Wave.Samples,
Wave.SampleRate);
Recognizer.Decode(Stream);
RecognitionResult
:=
Recognizer.GetResult(Stream);
Stop
:=
Now;
Elapsed
:=
MilliSecondsBetween(Stop,
Start)
/
1000
;
Duration
:=
Length(Wave.Samples)
/
Wave.SampleRate;
RealTimeFactor
:=
Elapsed
/
Duration;
WriteLn(RecognitionResult.ToString);
WriteLn(Format('NumThreads
%d',
[
Config.ModelConfig.NumThreads
]
));
WriteLn(Format('Elapsed
%.
3
f
s',
[
Elapsed
]
));
WriteLn(Format('Wave
duration
%.
3
f
s',
[
Duration
]
));
WriteLn(Format('RTF
=
%.
3
f/%.
3
f
=
%.
3
f',
[
Elapsed
,
Duration
,
RealTimeFactor
]
));
{
Free
resources
to
avoid
memory
leak.
Note:
You
don't
need
to
invoke
them
for
this
simple
script.
However,
you
have
to
invoke
them
in
your
own
large/complex
project.
}
FreeAndNil(Stream);
FreeAndNil(Recognizer);
end.
...
...
pascal-api-examples/non-streaming-asr/run-moonshine.sh
0 → 100755
查看文件 @
cdd8e1b
#!/usr/bin/env bash
set
-ex
SCRIPT_DIR
=
$(
cd
--
"
$(
dirname --
"
${
BASH_SOURCE
[0]
}
"
)
"
&> /dev/null
&&
pwd
)
SHERPA_ONNX_DIR
=
$(
cd
$SCRIPT_DIR
/../..
&&
pwd
)
echo
"SHERPA_ONNX_DIR:
$SHERPA_ONNX_DIR
"
if
[[
! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib
&&
! -f ../../build/install/lib/libsherpa-onnx-c-api.so
&&
! -f ../../build/install/lib/sherpa-onnx-c-api.dll
]]
;
then
mkdir -p ../../build
pushd
../../build
cmake
\
-DCMAKE_INSTALL_PREFIX
=
./install
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
..
cmake --build . --target install --config Release
ls -lh lib
popd
fi
if
[
! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi
fpc
\
-dSHERPA_ONNX_USE_SHARED_LIBS
\
-Fu
$SHERPA_ONNX_DIR
/sherpa-onnx/pascal-api
\
-Fl
$SHERPA_ONNX_DIR
/build/install/lib
\
./moonshine.pas
export
LD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$LD_LIBRARY_PATH
export
DYLD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$DYLD_LIBRARY_PATH
./moonshine
...
...
pascal-api-examples/vad-with-non-streaming-asr/.gitignore
查看文件 @
cdd8e1b
!run-*.sh
vad_with_whisper
vad_with_sense_voice
vad_with_moonshine
...
...
pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-moonshine.sh
0 → 100755
查看文件 @
cdd8e1b
#!/usr/bin/env bash
set
-ex
SCRIPT_DIR
=
$(
cd
--
"
$(
dirname --
"
${
BASH_SOURCE
[0]
}
"
)
"
&> /dev/null
&&
pwd
)
SHERPA_ONNX_DIR
=
$(
cd
$SCRIPT_DIR
/../..
&&
pwd
)
echo
"SHERPA_ONNX_DIR:
$SHERPA_ONNX_DIR
"
if
[[
! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib
&&
! -f ../../build/install/lib/libsherpa-onnx-c-api.so
&&
! -f ../../build/install/lib/sherpa-onnx-c-api.dll
]]
;
then
mkdir -p ../../build
pushd
../../build
cmake
\
-DCMAKE_INSTALL_PREFIX
=
./install
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
..
cmake --build . --target install --config Release
popd
fi
if
[[
! -f ./silero_vad.onnx
]]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if
[
! -f ./Obama.wav
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
fi
if
[
! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi
fpc
\
-dSHERPA_ONNX_USE_SHARED_LIBS
\
-Fu
$SHERPA_ONNX_DIR
/sherpa-onnx/pascal-api
\
-Fl
$SHERPA_ONNX_DIR
/build/install/lib
\
./vad_with_moonshine.pas
export
LD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$LD_LIBRARY_PATH
export
DYLD_LIBRARY_PATH
=
$SHERPA_ONNX_DIR
/build/install/lib:
$DYLD_LIBRARY_PATH
./vad_with_moonshine
...
...
pascal-api-examples/vad-with-non-streaming-asr/vad_with_moonshine.pas
0 → 100644
查看文件 @
cdd8e1b
{
Copyright
(c)
2024
Xiaomi
Corporation
}
{
This
file
shows
how
to
use
a
non-streaming
Moonshine
model
with
silero
VAD
to
decode
files.
You
can
download
the
model
files
from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}
program
vad_with_moonshine;
{
$mode
objfpc
}
uses
sherpa_onnx,
SysUtils;
function
CreateVad():
TSherpaOnnxVoiceActivityDetector;
var
Config:
TSherpaOnnxVadModelConfig;
SampleRate:
Integer;
WindowSize:
Integer;
begin
Initialize(Config);
SampleRate
:=
16000
;
{
Please
don't
change
it
unless
you
know
the
details
}
WindowSize
:=
512
;
{
Please
don't
change
it
unless
you
know
the
details
}
Config.SileroVad.Model
:=
'./silero_vad.onnx';
Config.SileroVad.MinSpeechDuration
:=
0.5
;
Config.SileroVad.MinSilenceDuration
:=
0.5
;
Config.SileroVad.Threshold
:=
0.5
;
Config.SileroVad.WindowSize
:=
WindowSize;
Config.NumThreads:=
1
;
Config.Debug:=
True;
Config.Provider:=
'cpu';
Config.SampleRate
:=
SampleRate;
Result
:=
TSherpaOnnxVoiceActivityDetector.Create(Config,
30
);
end;
function
CreateOfflineRecognizer():
TSherpaOnnxOfflineRecognizer;
var
Config:
TSherpaOnnxOfflineRecognizerConfig;
begin
Initialize(Config);
Config.ModelConfig.Moonshine.Preprocessor
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/preprocess.onnx';
Config.ModelConfig.Moonshine.Encoder
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/encode.int
8
.onnx';
Config.ModelConfig.Moonshine.UncachedDecoder
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/uncached_decode.int
8
.onnx';
Config.ModelConfig.Moonshine.CachedDecoder
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/cached_decode.int
8
.onnx';
Config.ModelConfig.Tokens
:=
'./sherpa-onnx-moonshine-tiny-en-int
8
/tokens.txt';
Config.ModelConfig.Provider
:=
'cpu';
Config.ModelConfig.NumThreads
:=
1
;
Config.ModelConfig.Debug
:=
False;
Result
:=
TSherpaOnnxOfflineRecognizer.Create(Config);
end;
var
Wave:
TSherpaOnnxWave;
Recognizer:
TSherpaOnnxOfflineRecognizer;
Vad:
TSherpaOnnxVoiceActivityDetector;
Offset:
Integer;
WindowSize:
Integer;
SpeechSegment:
TSherpaOnnxSpeechSegment;
Start:
Single;
Duration:
Single;
Stream:
TSherpaOnnxOfflineStream;
RecognitionResult:
TSherpaOnnxOfflineRecognizerResult;
begin
Vad
:=
CreateVad();
Recognizer
:=
CreateOfflineRecognizer();
Wave
:=
SherpaOnnxReadWave('./Obama.wav');
if
Wave.SampleRate
<>
Vad.Config.SampleRate
then
begin
WriteLn(Format('Expected
sample
rate:
%d.
Given:
%d',
[
Vad.Config.SampleRate
,
Wave.SampleRate
]
));
Exit;
end;
WindowSize
:=
Vad.Config.SileroVad.WindowSize;
Offset
:=
0
;
while
Offset
+
WindowSize
<=
Length(Wave.Samples)
do
begin
Vad.AcceptWaveform(Wave.Samples,
Offset,
WindowSize);
Offset
+=
WindowSize;
while
not
Vad.IsEmpty
do
begin
SpeechSegment
:=
Vad.Front();
Vad.Pop();
Stream
:=
Recognizer.CreateStream();
Stream.AcceptWaveform(SpeechSegment.Samples,
Wave.SampleRate);
Recognizer.Decode(Stream);
RecognitionResult
:=
Recognizer.GetResult(Stream);
Start
:=
SpeechSegment.Start
/
Wave.SampleRate;
Duration
:=
Length(SpeechSegment.Samples)
/
Wave.SampleRate;
WriteLn(Format('%.
3
f
--
%.
3
f
%s',
[
Start
,
Start
+
Duration
,
RecognitionResult.Text
]
));
FreeAndNil(Stream);
end;
end;
Vad.Flush;
while
not
Vad.IsEmpty
do
begin
SpeechSegment
:=
Vad.Front();
Vad.Pop();
Stream
:=
Recognizer.CreateStream();
Stream.AcceptWaveform(SpeechSegment.Samples,
Wave.SampleRate);
Recognizer.Decode(Stream);
RecognitionResult
:=
Recognizer.GetResult(Stream);
Start
:=
SpeechSegment.Start
/
Wave.SampleRate;
Duration
:=
Length(SpeechSegment.Samples)
/
Wave.SampleRate;
WriteLn(Format('%.
3
f
--
%.
3
f
%s',
[
Start
,
Start
+
Duration
,
RecognitionResult.Text
]
));
FreeAndNil(Stream);
end;
FreeAndNil(Recognizer);
FreeAndNil(Vad);
end.
...
...
sherpa-onnx/pascal-api/sherpa_onnx.pas
查看文件 @
cdd8e1b
...
...
@@ -250,6 +250,14 @@ type
class
operator
Initialize(
{
$IFDEF
FPC
}
var
{
$ELSE
}
out
{
$ENDIF
}
Dest:
TSherpaOnnxOfflineWhisperModelConfig);
end;
TSherpaOnnxOfflineMoonshineModelConfig
=
record
Preprocessor:
AnsiString;
Encoder:
AnsiString;
UncachedDecoder:
AnsiString;
CachedDecoder:
AnsiString;
function
ToString:
AnsiString;
end;
TSherpaOnnxOfflineTdnnModelConfig
=
record
Model:
AnsiString;
function
ToString:
AnsiString;
...
...
@@ -285,6 +293,7 @@ type
BpeVocab:
AnsiString;
TeleSpeechCtc:
AnsiString;
SenseVoice:
TSherpaOnnxOfflineSenseVoiceModelConfig;
Moonshine:
TSherpaOnnxOfflineMoonshineModelConfig;
class
operator
Initialize(
{
$IFDEF
FPC
}
var
{
$ELSE
}
out
{
$ENDIF
}
Dest:
TSherpaOnnxOfflineModelConfig);
function
ToString:
AnsiString;
end;
...
...
@@ -617,6 +626,12 @@ type
Task:
PAnsiChar;
TailPaddings:
cint
32
;
end;
SherpaOnnxOfflineMoonshineModelConfig
=
record
Preprocessor:
PAnsiChar;
Encoder:
PAnsiChar;
UncachedDecoder:
PAnsiChar;
CachedDecoder:
PAnsiChar;
end;
SherpaOnnxOfflineTdnnModelConfig
=
record
Model:
PAnsiChar;
end;
...
...
@@ -644,6 +659,7 @@ type
BpeVocab:
PAnsiChar;
TeleSpeechCtc:
PAnsiChar;
SenseVoice:
SherpaOnnxOfflineSenseVoiceModelConfig;
Moonshine:
SherpaOnnxOfflineMoonshineModelConfig;
end;
SherpaOnnxOfflineRecognizerConfig
=
record
...
...
@@ -1312,6 +1328,16 @@ begin
[
Self.Encoder
,
Self.Decoder
,
Self.Language
,
Self.Task
,
Self.TailPaddings
]
);
end;
function
TSherpaOnnxOfflineMoonshineModelConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxOfflineMoonshineModelConfig('
+
'Preprocessor
:=
%s,
'
+
'Encoder
:=
%s,
'
+
'UncachedDecoder
:=
%s,
'
+
'CachedDecoder
:=
%s)',
[
Self.Preprocessor
,
Self.Encoder
,
Self.UncachedDecoder
,
Self.CachedDecoder
]
);
end;
function
TSherpaOnnxOfflineTdnnModelConfig.ToString:
AnsiString;
begin
Result
:=
Format('TSherpaOnnxOfflineTdnnModelConfig(Model
:=
%s)',
...
...
@@ -1353,13 +1379,14 @@ begin
'ModelingUnit
:=
%s,
'
+
'BpeVocab
:=
%s,
'
+
'TeleSpeechCtc
:=
%s,
'
+
'SenseVoice
:=
%s'
+
'SenseVoice
:=
%s,
'
+
'Moonshine
:=
%s'
+
')',
[
Self.Transducer.ToString
,
Self.Paraformer.ToString
,
Self.NeMoCtc.ToString
,
Self.Whisper.ToString
,
Self.Tdnn.ToString
,
Self.Tokens
,
Self.NumThreads
,
Self.Debug.ToString
,
Self.Provider
,
Self.ModelType
,
Self.ModelingUnit
,
Self.BpeVocab
,
Self.TeleSpeechCtc
,
Self.SenseVoice.ToString
Self.TeleSpeechCtc
,
Self.SenseVoice.ToString
,
Self.Moonshine.ToString
]
);
end;
...
...
@@ -1407,7 +1434,6 @@ begin
C.ModelConfig.Tdnn.Model
:=
PAnsiChar(Config.ModelConfig.Tdnn.Model);
C.ModelConfig.Tokens
:=
PAnsiChar(Config.ModelConfig.Tokens);
C.ModelConfig.NumThreads
:=
Config.ModelConfig.NumThreads;
C.ModelConfig.Debug
:=
Ord(Config.ModelConfig.Debug);
...
...
@@ -1421,6 +1447,11 @@ begin
C.ModelConfig.SenseVoice.Language
:=
PAnsiChar(Config.ModelConfig.SenseVoice.Language);
C.ModelConfig.SenseVoice.UseItn
:=
Ord(Config.ModelConfig.SenseVoice.UseItn);
C.ModelConfig.Moonshine.Preprocessor
:=
PAnsiChar(Config.ModelConfig.Moonshine.Preprocessor);
C.ModelConfig.Moonshine.Encoder
:=
PAnsiChar(Config.ModelConfig.Moonshine.Encoder);
C.ModelConfig.Moonshine.UncachedDecoder
:=
PAnsiChar(Config.ModelConfig.Moonshine.UncachedDecoder);
C.ModelConfig.Moonshine.CachedDecoder
:=
PAnsiChar(Config.ModelConfig.Moonshine.CachedDecoder);
C.LMConfig.Model
:=
PAnsiChar(Config.LMConfig.Model);
C.LMConfig.Scale
:=
Config.LMConfig.Scale;
...
...
请
注册
或
登录
后发表评论