Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-08-24 23:05:54 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-08-24 23:05:54 +0800
Commit
5ed8e31868cd763a0f65f13e7c5878549a1d2c11
5ed8e318
1 parent
537e163d
Add VAD and keyword spotting for the Node package with WebAssembly (#1286)
隐藏空白字符变更
内嵌
并排对比
正在显示
40 个修改的文件
包含
456 行增加
和
524 行删除
.github/scripts/test-nodejs-npm.sh
.github/workflows/npm.yaml
.github/workflows/test-nodejs.yaml
CHANGELOG.md
CMakeLists.txt
dart-api-examples/add-punctuations/pubspec.yaml
dart-api-examples/audio-tagging/pubspec.yaml
dart-api-examples/keyword-spotter/pubspec.yaml
dart-api-examples/non-streaming-asr/pubspec.yaml
dart-api-examples/speaker-identification/pubspec.yaml
dart-api-examples/streaming-asr/pubspec.yaml
dart-api-examples/tts/pubspec.yaml
dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
dart-api-examples/vad/pubspec.yaml
flutter-examples/streaming_asr/pubspec.yaml
flutter-examples/tts/pubspec.yaml
flutter/sherpa_onnx/pubspec.yaml
flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
new-release.sh
nodejs-addon-examples/package.json
nodejs-addon-examples/test_keyword_spotter_transducer.js
nodejs-addon-examples/test_vad_with_non_streaming_asr_whisper.js
nodejs-examples/test-keyword-spotter-transducer.js
nodejs-examples/test-offline-nemo-ctc.js
nodejs-examples/test-offline-paraformer-itn.js
nodejs-examples/test-offline-paraformer.js
nodejs-examples/test-offline-sense-voice.js
nodejs-examples/test-offline-transducer.js
nodejs-examples/test-offline-whisper.js
nodejs-examples/test-online-paraformer-microphone.js
nodejs-examples/test-online-paraformer.js
nodejs-examples/test-online-transducer.js
nodejs-examples/test-vad-with-non-streaming-asr-whisper.js
scripts/nodejs/.gitignore
scripts/nodejs/index.js
wasm/asr/sherpa-onnx-asr.js
wasm/kws/sherpa-onnx-kws.js
wasm/nodejs/CMakeLists.txt
wasm/nodejs/sherpa-onnx-wave.js
.github/scripts/test-nodejs-npm.sh
查看文件 @
5ed8e31
...
...
@@ -9,6 +9,28 @@ git status
ls -lh
ls -lh node_modules
echo
'-----vad+whisper----------'
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
node ./test-vad-with-non-streaming-asr-whisper.js
rm Obama.wav
rm silero_vad.onnx
rm -rf sherpa-onnx-whisper-tiny.en
echo
"----------keyword spotting----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
node ./test-keyword-spotter-transducer.js
rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
# offline asr
#
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
...
...
.github/workflows/npm.yaml
查看文件 @
5ed8e31
name
:
npm
on
:
push
:
branches
:
-
npm
workflow_dispatch
:
concurrency
:
...
...
@@ -27,6 +30,9 @@ jobs:
-
name
:
Install emsdk
uses
:
mymindstorm/setup-emsdk@v14
with
:
version
:
3.1.51
actions-cache-folder
:
'
emsdk-cache'
-
name
:
View emsdk version
shell
:
bash
...
...
@@ -51,8 +57,6 @@ jobs:
-
name
:
Build nodejs package
shell
:
bash
env
:
NODE_AUTH_TOKEN
:
${{ secrets.NPM_TOKEN }}
run
:
|
./build-wasm-simd-nodejs.sh
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
...
...
@@ -71,6 +75,29 @@ jobs:
rm package.json.bak
-
name
:
Collect files
shell
:
bash
run
:
|
dst=sherpa-onnx-wasm-nodejs
mkdir $dst
cp -v scripts/nodejs/* $dst
tar cvjf $dst.tar.bz2 $dst
echo "---"
ls -h $dst
-
uses
:
actions/upload-artifact@v4
with
:
name
:
sherpa-onnx-wasm-nodejs
path
:
./*.tar.bz2
-
name
:
Build nodejs package
shell
:
bash
env
:
NODE_AUTH_TOKEN
:
${{ secrets.NPM_TOKEN }}
run
:
|
cd scripts/nodejs
git diff
npm install
...
...
.github/workflows/test-nodejs.yaml
查看文件 @
5ed8e31
...
...
@@ -55,6 +55,9 @@ jobs:
-
name
:
Install emsdk
uses
:
mymindstorm/setup-emsdk@v14
with
:
version
:
3.1.51
actions-cache-folder
:
'
emsdk-cache'
-
name
:
View emsdk version
shell
:
bash
...
...
@@ -109,6 +112,7 @@ jobs:
node --version
npm --version
export d=scripts/nodejs
cat $d/index.js
pushd $d
npm install
...
...
CHANGELOG.md
查看文件 @
5ed8e31
## 1.10.23
*
flutter: add lang, emotion, event to OfflineRecognizerResult (#1268)
*
Use a separate thread to initialize models for lazarus examples. (#1270)
*
Object pascal examples for recording and playing audio with portaudio. (#1271)
*
Text to speech API for Object Pascal. (#1273)
*
update kotlin api for better release native object and add user-friendly apis. (#1275)
*
Update wave-reader.cc to support 8/16/32-bit waves (#1278)
*
Add WebAssembly for VAD (#1281)
*
WebAssembly example for VAD + Non-streaming ASR (#1284)
## 1.10.22
*
Add Pascal API for reading wave files (#1243)
...
...
CMakeLists.txt
查看文件 @
5ed8e31
...
...
@@ -11,7 +11,7 @@ project(sherpa-onnx)
# ./nodejs-addon-examples
# ./dart-api-examples/
# ./CHANGELOG.md
set
(
SHERPA_ONNX_VERSION
"1.10.2
2
"
)
set
(
SHERPA_ONNX_VERSION
"1.10.2
3
"
)
# Disable warning about
#
...
...
@@ -206,6 +206,7 @@ if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
if
(
NOT SHERPA_ONNX_ENABLE_WASM
)
message
(
FATAL_ERROR
"Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS"
)
endif
()
add_definitions
(
-DSHERPA_ONNX_ENABLE_WASM_KWS=1
)
endif
()
if
(
SHERPA_ONNX_ENABLE_WASM
)
...
...
dart-api-examples/add-punctuations/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -9,7 +9,7 @@ environment:
sdk
:
^3.4.0
dependencies
:
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
path
:
^1.9.0
args
:
^2.5.0
...
...
dart-api-examples/audio-tagging/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -9,7 +9,7 @@ environment:
sdk
:
^3.4.0
dependencies
:
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
path
:
^1.9.0
args
:
^2.5.0
...
...
dart-api-examples/keyword-spotter/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -9,7 +9,7 @@ environment:
sdk
:
^3.4.0
dependencies
:
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path
:
^1.9.0
...
...
dart-api-examples/non-streaming-asr/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -10,7 +10,7 @@ environment:
# Add regular dependencies here.
dependencies
:
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
path
:
^1.9.0
args
:
^2.5.0
...
...
dart-api-examples/speaker-identification/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -9,7 +9,7 @@ environment:
sdk
:
^3.4.0
dependencies
:
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
path
:
^1.9.0
args
:
^2.5.0
...
...
dart-api-examples/streaming-asr/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -11,7 +11,7 @@ environment:
# Add regular dependencies here.
dependencies
:
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
path
:
^1.9.0
args
:
^2.5.0
...
...
dart-api-examples/tts/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -8,7 +8,7 @@ environment:
# Add regular dependencies here.
dependencies
:
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
path
:
^1.9.0
args
:
^2.5.0
...
...
dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -10,7 +10,7 @@ environment:
sdk
:
^3.4.0
dependencies
:
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
path
:
^1.9.0
args
:
^2.5.0
...
...
dart-api-examples/vad/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -9,7 +9,7 @@ environment:
sdk
:
^3.4.0
dependencies
:
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
path
:
^1.9.0
args
:
^2.5.0
...
...
flutter-examples/streaming_asr/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -5,7 +5,7 @@ description: >
publish_to
:
'
none'
version
:
1.10.2
2
version
:
1.10.2
3
topics
:
-
speech-recognition
...
...
@@ -30,7 +30,7 @@ dependencies:
record
:
^5.1.0
url_launcher
:
^6.2.6
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
...
...
flutter-examples/tts/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -5,7 +5,7 @@ description: >
publish_to
:
'
none'
# Remove this line if you wish to publish to pub.dev
version
:
1.10.2
2
version
:
1.10.2
3
environment
:
sdk
:
'
>=3.4.0
<4.0.0'
...
...
@@ -17,7 +17,7 @@ dependencies:
cupertino_icons
:
^1.0.6
path_provider
:
^2.1.3
path
:
^1.9.0
sherpa_onnx
:
^1.10.2
2
sherpa_onnx
:
^1.10.2
3
url_launcher
:
^6.2.6
audioplayers
:
^5.0.0
...
...
flutter/sherpa_onnx/pubspec.yaml
查看文件 @
5ed8e31
...
...
@@ -17,7 +17,7 @@ topics:
-
voice-activity-detection
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
version
:
1.10.2
2
version
:
1.10.2
3
homepage
:
https://github.com/k2-fsa/sherpa-onnx
...
...
@@ -30,23 +30,23 @@ dependencies:
flutter
:
sdk
:
flutter
sherpa_onnx_android
:
^1.10.2
2
sherpa_onnx_android
:
^1.10.2
3
# sherpa_onnx_android:
# path: ../sherpa_onnx_android
sherpa_onnx_macos
:
^1.10.2
2
sherpa_onnx_macos
:
^1.10.2
3
# sherpa_onnx_macos:
# path: ../sherpa_onnx_macos
sherpa_onnx_linux
:
^1.10.2
2
sherpa_onnx_linux
:
^1.10.2
3
# sherpa_onnx_linux:
# path: ../sherpa_onnx_linux
#
sherpa_onnx_windows
:
^1.10.2
2
sherpa_onnx_windows
:
^1.10.2
3
# sherpa_onnx_windows:
# path: ../sherpa_onnx_windows
sherpa_onnx_ios
:
^1.10.2
2
sherpa_onnx_ios
:
^1.10.2
3
# sherpa_onnx_ios:
# path: ../sherpa_onnx_ios
...
...
flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
查看文件 @
5ed8e31
...
...
@@ -7,7 +7,7 @@
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
Pod
::
Spec
.
new
do
|
s
|
s
.
name
=
'sherpa_onnx_ios'
s
.
version
=
'1.10.2
2
'
s
.
version
=
'1.10.2
3
'
s
.
summary
=
'A new Flutter FFI plugin project.'
s
.
description
=
<<-
DESC
A new Flutter FFI plugin project.
...
...
flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
查看文件 @
5ed8e31
...
...
@@ -4,7 +4,7 @@
#
Pod
::
Spec
.
new
do
|
s
|
s
.
name
=
'sherpa_onnx_macos'
s
.
version
=
'1.10.2
2
'
s
.
version
=
'1.10.2
3
'
s
.
summary
=
'sherpa-onnx Flutter FFI plugin project.'
s
.
description
=
<<-
DESC
sherpa-onnx Flutter FFI plugin project.
...
...
new-release.sh
0 → 100755
查看文件 @
5ed8e31
#!/usr/bin/env bash
find flutter -name
*
.yaml -type f -exec sed -i.bak
's/1\.10\.22/1\.10\.23/g'
{}
\;
find dart-api-examples -name
*
.yaml -type f -exec sed -i.bak
's/1\.10\.22/1\.10\.23/g'
{}
\;
find flutter-examples -name
*
.yaml -type f -exec sed -i.bak
's/1\.10\.22/1\.10\.23/g'
{}
\;
find flutter -name
*
.podspec -type f -exec sed -i.bak
's/1\.10\.22/1\.10\.23/g'
{}
\;
find nodejs-addon-examples -name package.json -type f -exec sed -i.bak
's/1\.10\.22/1\.10\.23/g'
{}
\;
...
...
nodejs-addon-examples/package.json
查看文件 @
5ed8e31
{
"dependencies"
:
{
"sherpa-onnx-node"
:
"^1.10.2
2
"
"sherpa-onnx-node"
:
"^1.10.2
3
"
}
}
...
...
nodejs-addon-examples/test_keyword_spotter_transducer.js
查看文件 @
5ed8e31
...
...
@@ -42,11 +42,11 @@ stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
const
detectedKeywords
=
[];
while
(
kws
.
isReady
(
stream
))
{
kws
.
decode
(
stream
);
const
keyword
=
kws
.
getResult
(
stream
).
keyword
;
if
(
keyword
!=
''
)
{
detectedKeywords
.
push
(
keyword
);
}
kws
.
decode
(
stream
);
}
let
stop
=
Date
.
now
();
...
...
nodejs-addon-examples/test_vad_with_non_streaming_asr_whisper.js
查看文件 @
5ed8e31
...
...
@@ -120,8 +120,8 @@ console.log('Done')
const
elapsed_seconds
=
(
stop
-
start
)
/
1000
;
const
duration
=
wave
.
samples
.
length
/
wave
.
sampleRate
;
const
real_time_factor
=
elapsed_seconds
/
duration
;
console
.
log
(
'Wave duration'
,
duration
.
toFixed
(
3
),
'secodns'
)
console
.
log
(
'Elapsed'
,
elapsed_seconds
.
toFixed
(
3
),
'secodns'
)
console
.
log
(
'Wave duration'
,
duration
.
toFixed
(
3
),
'seconds'
)
console
.
log
(
'Elapsed'
,
elapsed_seconds
.
toFixed
(
3
),
'seconds'
)
console
.
log
(
`
RTF
=
$
{
elapsed_seconds
.
toFixed
(
3
)}
/${duration.toFixed
(
3
)
} =`
,
real_time_factor
.
toFixed
(
3
))
...
...
nodejs-examples/test-keyword-spotter-transducer.js
0 → 100644
查看文件 @
5ed8e31
// Copyright (c) 2024 Xiaomi Corporation
const
sherpa_onnx
=
require
(
'sherpa-onnx'
);
function
createKeywordSpotter
()
{
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
const
config
=
{
'modelConfig'
:
{
'transducer'
:
{
'encoder'
:
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx'
,
'decoder'
:
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx'
,
'joiner'
:
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx'
,
},
'tokens'
:
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt'
,
},
keywords
:
'w én s ēn t è k ǎ s uǒ @文森特卡索\n'
+
'f ǎ g uó @法国'
};
return
sherpa_onnx
.
createKws
(
config
);
}
const
kws
=
createKeywordSpotter
();
const
stream
=
kws
.
createStream
();
const
waveFilename
=
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav'
;
const
wave
=
sherpa_onnx
.
readWave
(
waveFilename
);
stream
.
acceptWaveform
(
wave
.
sampleRate
,
wave
.
samples
);
const
tailPadding
=
new
Float32Array
(
wave
.
sampleRate
*
0.4
);
stream
.
acceptWaveform
(
kws
.
config
.
featConfig
.
sampleRate
,
tailPadding
);
const
detectedKeywords
=
[];
while
(
kws
.
isReady
(
stream
))
{
kws
.
decode
(
stream
);
const
keyword
=
kws
.
getResult
(
stream
).
keyword
;
if
(
keyword
!=
''
)
{
detectedKeywords
.
push
(
keyword
);
}
}
console
.
log
(
detectedKeywords
);
stream
.
free
();
kws
.
free
();
...
...
nodejs-examples/test-offline-nemo-ctc.js
查看文件 @
5ed8e31
...
...
@@ -7,27 +7,13 @@ const wav = require('wav');
const
sherpa_onnx
=
require
(
'sherpa-onnx'
);
function
createOfflineRecognizer
()
{
let
featConfig
=
{
sampleRate
:
16000
,
featureDim
:
80
,
};
let
modelConfig
=
{
nemoCtc
:
{
model
:
'./sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx'
,
},
tokens
:
'./sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt'
,
numThreads
:
1
,
debug
:
0
,
provider
:
'cpu'
,
modelType
:
'nemo_ctc'
,
};
let
config
=
{
featConfig
:
featConfig
,
modelConfig
:
modelConfig
,
decodingMethod
:
'greedy_search'
,
maxActivePaths
:
4
,
modelConfig
:
{
nemoCtc
:
{
model
:
'./sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx'
,
},
tokens
:
'./sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt'
,
}
};
return
sherpa_onnx
.
createOfflineRecognizer
(
config
);
...
...
@@ -38,63 +24,12 @@ const stream = recognizer.createStream();
const
waveFilename
=
'./sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav'
;
const
wave
=
sherpa_onnx
.
readWave
(
waveFilename
);
stream
.
acceptWaveform
(
wave
.
sampleRate
,
wave
.
samples
);
const
reader
=
new
wav
.
Reader
();
const
readable
=
new
Readable
().
wrap
(
reader
);
const
buf
=
[];
reader
.
on
(
'format'
,
({
audioFormat
,
bitDepth
,
channels
,
sampleRate
})
=>
{
if
(
sampleRate
!=
recognizer
.
config
.
featConfig
.
sampleRate
)
{
throw
new
Error
(
`
Only
support
sampleRate
$
{
recognizer
.
config
.
featConfig
.
sampleRate
}.
Given
$
{
sampleRate
}
`
);
}
if
(
audioFormat
!=
1
)
{
throw
new
Error
(
`
Only
support
PCM
format
.
Given
$
{
audioFormat
}
`
);
}
if
(
channels
!=
1
)
{
throw
new
Error
(
`
Only
a
single
channel
.
Given
$
{
channel
}
`
);
}
if
(
bitDepth
!=
16
)
{
throw
new
Error
(
`
Only
support
16
-
bit
samples
.
Given
$
{
bitDepth
}
`
);
}
});
fs
.
createReadStream
(
waveFilename
,
{
highWaterMark
:
4096
})
.
pipe
(
reader
)
.
on
(
'finish'
,
function
(
err
)
{
// tail padding
const
floatSamples
=
new
Float32Array
(
recognizer
.
config
.
featConfig
.
sampleRate
*
0.5
);
buf
.
push
(
floatSamples
);
const
flattened
=
Float32Array
.
from
(
buf
.
reduce
((
a
,
b
)
=>
[...
a
,
...
b
],
[]));
stream
.
acceptWaveform
(
recognizer
.
config
.
featConfig
.
sampleRate
,
flattened
);
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
stream
.
free
();
recognizer
.
free
();
});
readable
.
on
(
'readable'
,
function
()
{
let
chunk
;
while
((
chunk
=
readable
.
read
())
!=
null
)
{
const
int16Samples
=
new
Int16Array
(
chunk
.
buffer
,
chunk
.
byteOffset
,
chunk
.
length
/
Int16Array
.
BYTES_PER_ELEMENT
);
const
floatSamples
=
new
Float32Array
(
int16Samples
.
length
);
for
(
let
i
=
0
;
i
<
floatSamples
.
length
;
i
++
)
{
floatSamples
[
i
]
=
int16Samples
[
i
]
/
32768.0
;
}
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
buf
.
push
(
floatSamples
);
}
});
stream
.
free
();
recognizer
.
free
();
...
...
nodejs-examples/test-offline-paraformer-itn.js
查看文件 @
5ed8e31
...
...
@@ -7,27 +7,15 @@ const wav = require('wav');
const
sherpa_onnx
=
require
(
'sherpa-onnx'
);
function
createOfflineRecognizer
()
{
let
featConfig
=
{
sampleRate
:
16000
,
featureDim
:
80
,
};
let
modelConfig
=
{
paraformer
:
{
model
:
'./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx'
,
},
tokens
:
'./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt'
,
numThreads
:
1
,
debug
:
0
,
provider
:
'cpu'
,
modelType
:
'paraformer'
,
};
let
config
=
{
featConfig
:
featConfig
,
modelConfig
:
modelConfig
,
decodingMethod
:
'greedy_search'
,
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
ruleFsts
:
'./itn_zh_number.fst'
,
};
...
...
@@ -41,62 +29,12 @@ const stream = recognizer.createStream();
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
const
waveFilename
=
'./itn-zh-number.wav'
;
const
wave
=
sherpa_onnx
.
readWave
(
waveFilename
);
stream
.
acceptWaveform
(
wave
.
sampleRate
,
wave
.
samples
);
const
reader
=
new
wav
.
Reader
();
const
readable
=
new
Readable
().
wrap
(
reader
);
const
buf
=
[];
reader
.
on
(
'format'
,
({
audioFormat
,
bitDepth
,
channels
,
sampleRate
})
=>
{
if
(
sampleRate
!=
recognizer
.
config
.
featConfig
.
sampleRate
)
{
throw
new
Error
(
`
Only
support
sampleRate
$
{
recognizer
.
config
.
featConfig
.
sampleRate
}.
Given
$
{
sampleRate
}
`
);
}
if
(
audioFormat
!=
1
)
{
throw
new
Error
(
`
Only
support
PCM
format
.
Given
$
{
audioFormat
}
`
);
}
if
(
channels
!=
1
)
{
throw
new
Error
(
`
Only
a
single
channel
.
Given
$
{
channel
}
`
);
}
if
(
bitDepth
!=
16
)
{
throw
new
Error
(
`
Only
support
16
-
bit
samples
.
Given
$
{
bitDepth
}
`
);
}
});
fs
.
createReadStream
(
waveFilename
,
{
'highWaterMark'
:
4096
})
.
pipe
(
reader
)
.
on
(
'finish'
,
function
(
err
)
{
// tail padding
const
floatSamples
=
new
Float32Array
(
recognizer
.
config
.
featConfig
.
sampleRate
*
0.5
);
buf
.
push
(
floatSamples
);
const
flattened
=
Float32Array
.
from
(
buf
.
reduce
((
a
,
b
)
=>
[...
a
,
...
b
],
[]));
stream
.
acceptWaveform
(
recognizer
.
config
.
featConfig
.
sampleRate
,
flattened
);
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
stream
.
free
();
recognizer
.
free
();
});
readable
.
on
(
'readable'
,
function
()
{
let
chunk
;
while
((
chunk
=
readable
.
read
())
!=
null
)
{
const
int16Samples
=
new
Int16Array
(
chunk
.
buffer
,
chunk
.
byteOffset
,
chunk
.
length
/
Int16Array
.
BYTES_PER_ELEMENT
);
const
floatSamples
=
new
Float32Array
(
int16Samples
.
length
);
for
(
let
i
=
0
;
i
<
floatSamples
.
length
;
i
++
)
{
floatSamples
[
i
]
=
int16Samples
[
i
]
/
32768.0
;
}
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
buf
.
push
(
floatSamples
);
}
});
stream
.
free
();
recognizer
.
free
();
...
...
nodejs-examples/test-offline-paraformer.js
查看文件 @
5ed8e31
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
const
fs
=
require
(
'fs'
);
const
{
Readable
}
=
require
(
'stream'
);
const
wav
=
require
(
'wav'
);
const
sherpa_onnx
=
require
(
'sherpa-onnx'
);
function
createOfflineRecognizer
()
{
let
featConfig
=
{
sampleRate
:
16000
,
featureDim
:
80
,
};
let
modelConfig
=
{
paraformer
:
{
model
:
'./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx'
,
},
tokens
:
'./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt'
,
numThreads
:
1
,
debug
:
0
,
provider
:
'cpu'
,
modelType
:
'paraformer'
,
};
let
config
=
{
featConfig
:
featConfig
,
modelConfig
:
modelConfig
,
decodingMethod
:
'greedy_search'
,
};
return
sherpa_onnx
.
createOfflineRecognizer
(
config
);
}
const
recognizer
=
createOfflineRecognizer
();
const
stream
=
recognizer
.
createStream
();
const
waveFilename
=
'./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav'
;
const
wave
=
sherpa_onnx
.
readWave
(
waveFilename
);
stream
.
acceptWaveform
(
wave
.
sampleRate
,
wave
.
samples
);
const
reader
=
new
wav
.
Reader
();
const
readable
=
new
Readable
().
wrap
(
reader
);
const
buf
=
[];
reader
.
on
(
'format'
,
({
audioFormat
,
bitDepth
,
channels
,
sampleRate
})
=>
{
if
(
sampleRate
!=
recognizer
.
config
.
featConfig
.
sampleRate
)
{
throw
new
Error
(
`
Only
support
sampleRate
$
{
recognizer
.
config
.
featConfig
.
sampleRate
}.
Given
$
{
sampleRate
}
`
);
}
if
(
audioFormat
!=
1
)
{
throw
new
Error
(
`
Only
support
PCM
format
.
Given
$
{
audioFormat
}
`
);
}
if
(
channels
!=
1
)
{
throw
new
Error
(
`
Only
a
single
channel
.
Given
$
{
channel
}
`
);
}
if
(
bitDepth
!=
16
)
{
throw
new
Error
(
`
Only
support
16
-
bit
samples
.
Given
$
{
bitDepth
}
`
);
}
});
fs
.
createReadStream
(
waveFilename
,
{
'highWaterMark'
:
4096
})
.
pipe
(
reader
)
.
on
(
'finish'
,
function
(
err
)
{
// tail padding
const
floatSamples
=
new
Float32Array
(
recognizer
.
config
.
featConfig
.
sampleRate
*
0.5
);
buf
.
push
(
floatSamples
);
const
flattened
=
Float32Array
.
from
(
buf
.
reduce
((
a
,
b
)
=>
[...
a
,
...
b
],
[]));
stream
.
acceptWaveform
(
recognizer
.
config
.
featConfig
.
sampleRate
,
flattened
);
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
stream
.
free
();
recognizer
.
free
();
});
readable
.
on
(
'readable'
,
function
()
{
let
chunk
;
while
((
chunk
=
readable
.
read
())
!=
null
)
{
const
int16Samples
=
new
Int16Array
(
chunk
.
buffer
,
chunk
.
byteOffset
,
chunk
.
length
/
Int16Array
.
BYTES_PER_ELEMENT
);
const
floatSamples
=
new
Float32Array
(
int16Samples
.
length
);
for
(
let
i
=
0
;
i
<
floatSamples
.
length
;
i
++
)
{
floatSamples
[
i
]
=
int16Samples
[
i
]
/
32768.0
;
}
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
buf
.
push
(
floatSamples
);
}
});
stream
.
free
();
recognizer
.
free
();
...
...
nodejs-examples/test-offline-sense-voice.js
查看文件 @
5ed8e31
// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
const
fs
=
require
(
'fs'
);
const
{
Readable
}
=
require
(
'stream'
);
const
wav
=
require
(
'wav'
);
const
sherpa_onnx
=
require
(
'sherpa-onnx'
);
function
createOfflineRecognizer
()
{
let
featConfig
=
{
sampleRate
:
16000
,
featureDim
:
80
,
};
let
modelConfig
=
{
senseVoice
:
{
model
:
...
...
@@ -20,82 +11,26 @@ function createOfflineRecognizer() {
useInverseTextNormalization
:
1
,
},
tokens
:
'./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt'
,
numThreads
:
1
,
debug
:
0
,
provider
:
'cpu'
,
};
let
config
=
{
featConfig
:
featConfig
,
modelConfig
:
modelConfig
,
decodingMethod
:
'greedy_search'
,
};
return
sherpa_onnx
.
createOfflineRecognizer
(
config
);
}
const
recognizer
=
createOfflineRecognizer
();
const
stream
=
recognizer
.
createStream
();
const
waveFilename
=
'./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav'
;
const
wave
=
sherpa_onnx
.
readWave
(
waveFilename
);
stream
.
acceptWaveform
(
wave
.
sampleRate
,
wave
.
samples
);
const
reader
=
new
wav
.
Reader
();
const
readable
=
new
Readable
().
wrap
(
reader
);
const
buf
=
[];
reader
.
on
(
'format'
,
({
audioFormat
,
bitDepth
,
channels
,
sampleRate
})
=>
{
if
(
sampleRate
!=
recognizer
.
config
.
featConfig
.
sampleRate
)
{
throw
new
Error
(
`
Only
support
sampleRate
$
{
recognizer
.
config
.
featConfig
.
sampleRate
}.
Given
$
{
sampleRate
}
`
);
}
if
(
audioFormat
!=
1
)
{
throw
new
Error
(
`
Only
support
PCM
format
.
Given
$
{
audioFormat
}
`
);
}
if
(
channels
!=
1
)
{
throw
new
Error
(
`
Only
a
single
channel
.
Given
$
{
channel
}
`
);
}
if
(
bitDepth
!=
16
)
{
throw
new
Error
(
`
Only
support
16
-
bit
samples
.
Given
$
{
bitDepth
}
`
);
}
});
fs
.
createReadStream
(
waveFilename
,
{
'highWaterMark'
:
4096
})
.
pipe
(
reader
)
.
on
(
'finish'
,
function
(
err
)
{
// tail padding
const
floatSamples
=
new
Float32Array
(
recognizer
.
config
.
featConfig
.
sampleRate
*
0.5
);
buf
.
push
(
floatSamples
);
const
flattened
=
Float32Array
.
from
(
buf
.
reduce
((
a
,
b
)
=>
[...
a
,
...
b
],
[]));
stream
.
acceptWaveform
(
recognizer
.
config
.
featConfig
.
sampleRate
,
flattened
);
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
stream
.
free
();
recognizer
.
free
();
});
readable
.
on
(
'readable'
,
function
()
{
let
chunk
;
while
((
chunk
=
readable
.
read
())
!=
null
)
{
const
int16Samples
=
new
Int16Array
(
chunk
.
buffer
,
chunk
.
byteOffset
,
chunk
.
length
/
Int16Array
.
BYTES_PER_ELEMENT
);
const
floatSamples
=
new
Float32Array
(
int16Samples
.
length
);
for
(
let
i
=
0
;
i
<
floatSamples
.
length
;
i
++
)
{
floatSamples
[
i
]
=
int16Samples
[
i
]
/
32768.0
;
}
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
buf
.
push
(
floatSamples
);
}
});
stream
.
free
();
recognizer
.
free
();
...
...
nodejs-examples/test-offline-transducer.js
查看文件 @
5ed8e31
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
//
const
fs
=
require
(
'fs'
);
const
{
Readable
}
=
require
(
'stream'
);
const
wav
=
require
(
'wav'
);
const
sherpa_onnx
=
require
(
'sherpa-onnx'
);
function
createOfflineRecognizer
()
{
let
featConfig
=
{
sampleRate
:
16000
,
featureDim
:
80
,
};
let
modelConfig
=
{
transducer
:
{
encoder
:
...
...
@@ -22,19 +13,11 @@ function createOfflineRecognizer() {
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx'
,
},
tokens
:
'./sherpa-onnx-zipformer-en-2023-06-26/tokens.txt'
,
numThreads
:
1
,
debug
:
0
,
provider
:
'cpu'
,
modelType
:
'transducer'
,
};
let
config
=
{
featConfig
:
featConfig
,
modelConfig
:
modelConfig
,
decodingMethod
:
'greedy_search'
,
maxActivePaths
:
4
,
hotwordsFile
:
''
,
hotwordsScore
:
1.5
,
};
return
sherpa_onnx
.
createOfflineRecognizer
(
config
);
...
...
@@ -43,62 +26,12 @@ const recognizer = createOfflineRecognizer();
const
stream
=
recognizer
.
createStream
();
const
waveFilename
=
'./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav'
;
const
wave
=
sherpa_onnx
.
readWave
(
waveFilename
);
stream
.
acceptWaveform
(
wave
.
sampleRate
,
wave
.
samples
);
const
reader
=
new
wav
.
Reader
();
const
readable
=
new
Readable
().
wrap
(
reader
);
const
buf
=
[];
reader
.
on
(
'format'
,
({
audioFormat
,
bitDepth
,
channels
,
sampleRate
})
=>
{
if
(
sampleRate
!=
recognizer
.
config
.
featConfig
.
sampleRate
)
{
throw
new
Error
(
`
Only
support
sampleRate
$
{
recognizer
.
config
.
featConfig
.
sampleRate
}.
Given
$
{
sampleRate
}
`
);
}
if
(
audioFormat
!=
1
)
{
throw
new
Error
(
`
Only
support
PCM
format
.
Given
$
{
audioFormat
}
`
);
}
if
(
channels
!=
1
)
{
throw
new
Error
(
`
Only
a
single
channel
.
Given
$
{
channel
}
`
);
}
if
(
bitDepth
!=
16
)
{
throw
new
Error
(
`
Only
support
16
-
bit
samples
.
Given
$
{
bitDepth
}
`
);
}
});
fs
.
createReadStream
(
waveFilename
,
{
'highWaterMark'
:
4096
})
.
pipe
(
reader
)
.
on
(
'finish'
,
function
(
err
)
{
// tail padding
const
floatSamples
=
new
Float32Array
(
recognizer
.
config
.
featConfig
.
sampleRate
*
0.5
);
buf
.
push
(
floatSamples
);
const
flattened
=
Float32Array
.
from
(
buf
.
reduce
((
a
,
b
)
=>
[...
a
,
...
b
],
[]));
stream
.
acceptWaveform
(
recognizer
.
config
.
featConfig
.
sampleRate
,
flattened
);
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
stream
.
free
();
recognizer
.
free
();
});
readable
.
on
(
'readable'
,
function
()
{
let
chunk
;
while
((
chunk
=
readable
.
read
())
!=
null
)
{
const
int16Samples
=
new
Int16Array
(
chunk
.
buffer
,
chunk
.
byteOffset
,
chunk
.
length
/
Int16Array
.
BYTES_PER_ELEMENT
);
const
floatSamples
=
new
Float32Array
(
int16Samples
.
length
);
for
(
let
i
=
0
;
i
<
floatSamples
.
length
;
i
++
)
{
floatSamples
[
i
]
=
int16Samples
[
i
]
/
32768.0
;
}
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
buf
.
push
(
floatSamples
);
}
});
stream
.
free
();
recognizer
.
free
();
...
...
nodejs-examples/test-offline-whisper.js
查看文件 @
5ed8e31
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
//
const
fs
=
require
(
'fs'
);
const
{
Readable
}
=
require
(
'stream'
);
const
wav
=
require
(
'wav'
);
const
sherpa_onnx
=
require
(
'sherpa-onnx'
);
function
createOfflineRecognizer
()
{
let
featConfig
=
{
sampleRate
:
16000
,
featureDim
:
80
,
};
let
modelConfig
=
{
whisper
:
{
encoder
:
'./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'
,
...
...
@@ -21,83 +12,25 @@ function createOfflineRecognizer() {
tailPaddings
:
-
1
,
},
tokens
:
'./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'
,
numThreads
:
1
,
debug
:
0
,
provider
:
'cpu'
,
modelType
:
'whisper'
,
};
let
config
=
{
featConfig
:
featConfig
,
modelConfig
:
modelConfig
,
decodingMethod
:
'greedy_search'
,
};
return
sherpa_onnx
.
createOfflineRecognizer
(
config
);
}
recognizer
=
createOfflineRecognizer
();
stream
=
recognizer
.
createStream
();
const
waveFilename
=
'./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav'
;
const
wave
=
sherpa_onnx
.
readWave
(
waveFilename
);
stream
.
acceptWaveform
(
wave
.
sampleRate
,
wave
.
samples
);
const
reader
=
new
wav
.
Reader
();
const
readable
=
new
Readable
().
wrap
(
reader
);
const
buf
=
[];
reader
.
on
(
'format'
,
({
audioFormat
,
bitDepth
,
channels
,
sampleRate
})
=>
{
if
(
sampleRate
!=
recognizer
.
config
.
featConfig
.
sampleRate
)
{
throw
new
Error
(
`
Only
support
sampleRate
$
{
recognizer
.
config
.
featConfig
.
sampleRate
}.
Given
$
{
sampleRate
}
`
);
}
if
(
audioFormat
!=
1
)
{
throw
new
Error
(
`
Only
support
PCM
format
.
Given
$
{
audioFormat
}
`
);
}
if
(
channels
!=
1
)
{
throw
new
Error
(
`
Only
a
single
channel
.
Given
$
{
channel
}
`
);
}
if
(
bitDepth
!=
16
)
{
throw
new
Error
(
`
Only
support
16
-
bit
samples
.
Given
$
{
bitDepth
}
`
);
}
});
fs
.
createReadStream
(
waveFilename
,
{
'highWaterMark'
:
4096
})
.
pipe
(
reader
)
.
on
(
'finish'
,
function
(
err
)
{
// tail padding
const
floatSamples
=
new
Float32Array
(
recognizer
.
config
.
featConfig
.
sampleRate
*
0.5
);
buf
.
push
(
floatSamples
);
const
flattened
=
Float32Array
.
from
(
buf
.
reduce
((
a
,
b
)
=>
[...
a
,
...
b
],
[]));
stream
.
acceptWaveform
(
recognizer
.
config
.
featConfig
.
sampleRate
,
flattened
);
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
stream
.
free
();
recognizer
.
free
();
});
readable
.
on
(
'readable'
,
function
()
{
let
chunk
;
while
((
chunk
=
readable
.
read
())
!=
null
)
{
const
int16Samples
=
new
Int16Array
(
chunk
.
buffer
,
chunk
.
byteOffset
,
chunk
.
length
/
Int16Array
.
BYTES_PER_ELEMENT
);
const
floatSamples
=
new
Float32Array
(
int16Samples
.
length
);
for
(
let
i
=
0
;
i
<
floatSamples
.
length
;
i
++
)
{
floatSamples
[
i
]
=
int16Samples
[
i
]
/
32768.0
;
}
recognizer
.
decode
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
console
.
log
(
text
);
buf
.
push
(
floatSamples
);
}
});
stream
.
free
();
recognizer
.
free
();
...
...
nodejs-examples/test-online-paraformer-microphone.js
查看文件 @
5ed8e31
...
...
@@ -16,22 +16,10 @@ function createOnlineRecognizer() {
let
onlineModelConfig
=
{
paraformer
:
onlineParaformerModelConfig
,
tokens
:
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'
,
numThreads
:
1
,
provider
:
'cpu'
,
debug
:
1
,
modelType
:
'paraformer'
,
};
let
featureConfig
=
{
sampleRate
:
16000
,
featureDim
:
80
,
};
let
recognizerConfig
=
{
featConfig
:
featureConfig
,
modelConfig
:
onlineModelConfig
,
decodingMethod
:
'greedy_search'
,
maxActivePaths
:
4
,
enableEndpoint
:
1
,
rule1MinTrailingSilence
:
2.4
,
rule2MinTrailingSilence
:
1.2
,
...
...
nodejs-examples/test-online-paraformer.js
查看文件 @
5ed8e31
...
...
@@ -17,26 +17,10 @@ function createOnlineRecognizer() {
let
onlineModelConfig
=
{
paraformer
:
onlineParaformerModelConfig
,
tokens
:
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'
,
numThreads
:
1
,
provider
:
'cpu'
,
debug
:
1
,
modelType
:
'paraformer'
,
};
let
featureConfig
=
{
sampleRate
:
16000
,
featureDim
:
80
,
};
let
recognizerConfig
=
{
featConfig
:
featureConfig
,
modelConfig
:
onlineModelConfig
,
decodingMethod
:
'greedy_search'
,
maxActivePaths
:
4
,
enableEndpoint
:
1
,
rule1MinTrailingSilence
:
2.4
,
rule2MinTrailingSilence
:
1.2
,
rule3MinUtteranceLength
:
20
,
};
return
sherpa_onnx
.
createOnlineRecognizer
(
recognizerConfig
);
...
...
nodejs-examples/test-online-transducer.js
查看文件 @
5ed8e31
...
...
@@ -20,26 +20,10 @@ function createOnlineRecognizer() {
transducer
:
onlineTransducerModelConfig
,
tokens
:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'
,
numThreads
:
1
,
provider
:
'cpu'
,
debug
:
1
,
modelType
:
'zipformer'
,
};
let
featureConfig
=
{
sampleRate
:
16000
,
featureDim
:
80
,
};
let
recognizerConfig
=
{
featConfig
:
featureConfig
,
modelConfig
:
onlineModelConfig
,
decodingMethod
:
'greedy_search'
,
maxActivePaths
:
4
,
enableEndpoint
:
1
,
rule1MinTrailingSilence
:
2.4
,
rule2MinTrailingSilence
:
1.2
,
rule3MinUtteranceLength
:
20
,
};
return
sherpa_onnx
.
createOnlineRecognizer
(
recognizerConfig
);
...
...
nodejs-examples/test-vad-with-non-streaming-asr-whisper.js
0 → 100644
查看文件 @
5ed8e31
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
const
sherpa_onnx
=
require
(
'sherpa-onnx'
);
function
createRecognizer
()
{
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const
config
=
{
'modelConfig'
:
{
'whisper'
:
{
'encoder'
:
'./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'
,
'decoder'
:
'./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'
,
'tailPaddings'
:
2000
,
},
'tokens'
:
'./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'
,
'debug'
:
0
,
}
};
return
sherpa_onnx
.
createOfflineRecognizer
(
config
);
}
function
createVad
()
{
// please download silero_vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
const
config
=
{
sileroVad
:
{
model
:
'./silero_vad.onnx'
,
threshold
:
0.5
,
minSpeechDuration
:
0.25
,
minSilenceDuration
:
0.5
,
windowSize
:
512
,
},
sampleRate
:
16000
,
debug
:
true
,
numThreads
:
1
,
bufferSizeInSeconds
:
60
,
};
return
sherpa_onnx
.
createVad
(
config
);
}
const
recognizer
=
createRecognizer
();
const
vad
=
createVad
();
// please download ./Obama.wav from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const
waveFilename
=
'./Obama.wav'
;
const
wave
=
sherpa_onnx
.
readWave
(
waveFilename
);
if
(
wave
.
sampleRate
!=
recognizer
.
config
.
featConfig
.
sampleRate
)
{
throw
new
Error
(
'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}'
);
}
console
.
log
(
'Started'
)
let
start
=
Date
.
now
();
const
windowSize
=
vad
.
config
.
sileroVad
.
windowSize
;
for
(
let
i
=
0
;
i
<
wave
.
samples
.
length
;
i
+=
windowSize
)
{
const
thisWindow
=
wave
.
samples
.
subarray
(
i
,
i
+
windowSize
);
vad
.
acceptWaveform
(
thisWindow
);
while
(
!
vad
.
isEmpty
())
{
const
segment
=
vad
.
front
();
vad
.
pop
();
let
start_time
=
segment
.
start
/
wave
.
sampleRate
;
let
end_time
=
start_time
+
segment
.
samples
.
length
/
wave
.
sampleRate
;
start_time
=
start_time
.
toFixed
(
2
);
end_time
=
end_time
.
toFixed
(
2
);
const
stream
=
recognizer
.
createStream
();
stream
.
acceptWaveform
(
wave
.
sampleRate
,
segment
.
samples
);
recognizer
.
decode
(
stream
);
const
r
=
recognizer
.
getResult
(
stream
);
if
(
r
.
text
.
length
>
0
)
{
const
text
=
r
.
text
.
toLowerCase
().
trim
();
console
.
log
(
`
$
{
start_time
}
--
$
{
end_time
}
:
$
{
text
}
`
);
}
stream
.
free
();
}
}
vad
.
flush
();
while
(
!
vad
.
isEmpty
())
{
const
segment
=
vad
.
front
();
vad
.
pop
();
let
start_time
=
segment
.
start
/
wave
.
sampleRate
;
let
end_time
=
start_time
+
segment
.
samples
.
length
/
wave
.
sampleRate
;
start_time
=
start_time
.
toFixed
(
2
);
end_time
=
end_time
.
toFixed
(
2
);
const
stream
=
recognizer
.
createStream
();
stream
.
acceptWaveform
(
wave
.
sampleRate
,
segment
.
samples
);
recognizer
.
decode
(
stream
);
const
r
=
recognizer
.
getResult
(
stream
);
if
(
r
.
text
.
length
>
0
)
{
const
text
=
r
.
text
.
toLowerCase
().
trim
();
console
.
log
(
`
$
{
start_time
}
--
$
{
end_time
}
:
$
{
text
}
`
);
}
}
let
stop
=
Date
.
now
();
console
.
log
(
'Done'
)
const
elapsed_seconds
=
(
stop
-
start
)
/
1000
;
const
duration
=
wave
.
samples
.
length
/
wave
.
sampleRate
;
const
real_time_factor
=
elapsed_seconds
/
duration
;
console
.
log
(
'Wave duration'
,
duration
.
toFixed
(
3
),
'seconds'
)
console
.
log
(
'Elapsed'
,
elapsed_seconds
.
toFixed
(
3
),
'seconds'
)
console
.
log
(
`
RTF
=
$
{
elapsed_seconds
.
toFixed
(
3
)}
/${duration.toFixed
(
3
)
} =`
,
real_time_factor
.
toFixed
(
3
))
vad
.
free
();
recognizer
.
free
();
...
...
scripts/nodejs/.gitignore
查看文件 @
5ed8e31
node_modules
jslint.mjs
sherpa-onnx-*.js
sherpa-onnx-*.wasm
...
...
scripts/nodejs/index.js
查看文件 @
5ed8e31
...
...
@@ -4,6 +4,9 @@
const
wasmModule
=
require
(
'./sherpa-onnx-wasm-nodejs.js'
)();
const
sherpa_onnx_asr
=
require
(
'./sherpa-onnx-asr.js'
);
const
sherpa_onnx_tts
=
require
(
'./sherpa-onnx-tts.js'
);
const
sherpa_onnx_kws
=
require
(
'./sherpa-onnx-kws.js'
);
const
sherpa_onnx_wave
=
require
(
'./sherpa-onnx-wave.js'
);
const
sherpa_onnx_vad
=
require
(
'./sherpa-onnx-vad.js'
);
function
createOnlineRecognizer
(
config
)
{
return
sherpa_onnx_asr
.
createOnlineRecognizer
(
wasmModule
,
config
);
...
...
@@ -17,10 +20,35 @@ function createOfflineTts(config) {
return
sherpa_onnx_tts
.
createOfflineTts
(
wasmModule
,
config
);
}
function
createKws
(
config
)
{
return
sherpa_onnx_kws
.
createKws
(
wasmModule
,
config
);
}
function
createCircularBuffer
(
capacity
)
{
return
new
sherpa_onnx_vad
.
CircularBuffer
(
capacity
,
wasmModule
);
}
function
createVad
(
config
)
{
return
sherpa_onnx_vad
.
createVad
(
wasmModule
,
config
);
}
function
readWave
(
filename
)
{
return
sherpa_onnx_wave
.
readWave
(
filename
,
wasmModule
);
}
function
writeWave
(
filename
,
data
)
{
sherpa_onnx_wave
.
writeWave
(
filename
,
data
,
wasmModule
);
}
// Note: online means streaming and offline means non-streaming here.
// Both of them don't require internet connection.
module
.
exports
=
{
createOnlineRecognizer
,
createOfflineRecognizer
,
createOfflineTts
,
createKws
,
readWave
,
writeWave
,
createCircularBuffer
,
createVad
,
};
...
...
wasm/asr/sherpa-onnx-asr.js
查看文件 @
5ed8e31
...
...
@@ -546,7 +546,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
Module
.
setValue
(
ptr
+
12
,
buffer
+
offset
,
'i8*'
);
offset
+=
taskLen
;
Module
.
setValue
(
ptr
+
16
,
config
.
tailPaddings
||
-
1
,
'i32'
);
Module
.
setValue
(
ptr
+
16
,
config
.
tailPaddings
||
2000
,
'i32'
);
return
{
buffer
:
buffer
,
ptr
:
ptr
,
len
:
len
,
...
...
wasm/kws/sherpa-onnx-kws.js
查看文件 @
5ed8e31
...
...
@@ -69,13 +69,14 @@ function initModelConfig(config, Module) {
const
len
=
transducer
.
len
+
paraformer_len
+
ctc_len
+
7
*
4
;
const
ptr
=
Module
.
_malloc
(
len
);
Module
.
HEAPU8
.
fill
(
0
,
ptr
,
ptr
+
len
);
let
offset
=
0
;
Module
.
_CopyHeap
(
transducer
.
ptr
,
transducer
.
len
,
ptr
+
offset
);
const
tokensLen
=
Module
.
lengthBytesUTF8
(
config
.
tokens
)
+
1
;
const
providerLen
=
Module
.
lengthBytesUTF8
(
config
.
provider
)
+
1
;
const
modelTypeLen
=
Module
.
lengthBytesUTF8
(
config
.
modelType
)
+
1
;
const
providerLen
=
Module
.
lengthBytesUTF8
(
config
.
provider
||
'cpu'
)
+
1
;
const
modelTypeLen
=
Module
.
lengthBytesUTF8
(
config
.
modelType
||
''
)
+
1
;
const
modelingUnitLen
=
Module
.
lengthBytesUTF8
(
config
.
modelingUnit
||
''
)
+
1
;
const
bpeVocabLen
=
Module
.
lengthBytesUTF8
(
config
.
bpeVocab
||
''
)
+
1
;
const
bufferLen
=
...
...
@@ -86,10 +87,10 @@ function initModelConfig(config, Module) {
Module
.
stringToUTF8
(
config
.
tokens
,
buffer
,
tokensLen
);
offset
+=
tokensLen
;
Module
.
stringToUTF8
(
config
.
provider
,
buffer
+
offset
,
providerLen
);
Module
.
stringToUTF8
(
config
.
provider
||
'cpu'
,
buffer
+
offset
,
providerLen
);
offset
+=
providerLen
;
Module
.
stringToUTF8
(
config
.
modelType
,
buffer
+
offset
,
modelTypeLen
);
Module
.
stringToUTF8
(
config
.
modelType
||
''
,
buffer
+
offset
,
modelTypeLen
);
offset
+=
modelTypeLen
;
Module
.
stringToUTF8
(
...
...
@@ -103,7 +104,7 @@ function initModelConfig(config, Module) {
Module
.
setValue
(
ptr
+
offset
,
buffer
,
'i8*'
);
// tokens
offset
+=
4
;
Module
.
setValue
(
ptr
+
offset
,
config
.
numThreads
,
'i32'
);
Module
.
setValue
(
ptr
+
offset
,
config
.
numThreads
||
1
,
'i32'
);
offset
+=
4
;
Module
.
setValue
(
ptr
+
offset
,
buffer
+
tokensLen
,
'i8*'
);
// provider
...
...
@@ -134,14 +135,21 @@ function initModelConfig(config, Module) {
function
initFeatureExtractorConfig
(
config
,
Module
)
{
let
ptr
=
Module
.
_malloc
(
4
*
2
);
Module
.
setValue
(
ptr
,
config
.
samplingRate
,
'i32'
);
Module
.
setValue
(
ptr
+
4
,
config
.
featureDim
,
'i32'
);
Module
.
setValue
(
ptr
,
config
.
samplingRate
||
16000
,
'i32'
);
Module
.
setValue
(
ptr
+
4
,
config
.
featureDim
||
80
,
'i32'
);
return
{
ptr
:
ptr
,
len
:
8
,
}
}
function
initKwsConfig
(
config
,
Module
)
{
if
(
!
(
'featConfig'
in
config
))
{
config
.
featConfig
=
{
sampleRate
:
16000
,
featureDim
:
80
,
};
}
let
featConfig
=
initFeatureExtractorConfig
(
config
.
featConfig
,
Module
);
let
modelConfig
=
initModelConfig
(
config
.
modelConfig
,
Module
);
...
...
@@ -155,16 +163,16 @@ function initKwsConfig(config, Module) {
Module
.
_CopyHeap
(
modelConfig
.
ptr
,
modelConfig
.
len
,
ptr
+
offset
)
offset
+=
modelConfig
.
len
;
Module
.
setValue
(
ptr
+
offset
,
config
.
maxActivePaths
,
'i32'
);
Module
.
setValue
(
ptr
+
offset
,
config
.
maxActivePaths
||
4
,
'i32'
);
offset
+=
4
;
Module
.
setValue
(
ptr
+
offset
,
config
.
numTrailingBlanks
,
'i32'
);
Module
.
setValue
(
ptr
+
offset
,
config
.
numTrailingBlanks
||
1
,
'i32'
);
offset
+=
4
;
Module
.
setValue
(
ptr
+
offset
,
config
.
keywordsScore
,
'float'
);
Module
.
setValue
(
ptr
+
offset
,
config
.
keywordsScore
||
1.0
,
'float'
);
offset
+=
4
;
Module
.
setValue
(
ptr
+
offset
,
config
.
keywordsThreshold
,
'float'
);
Module
.
setValue
(
ptr
+
offset
,
config
.
keywordsThreshold
||
0.25
,
'float'
);
offset
+=
4
;
let
keywordsLen
=
Module
.
lengthBytesUTF8
(
config
.
keywords
)
+
1
;
...
...
wasm/nodejs/CMakeLists.txt
查看文件 @
5ed8e31
...
...
@@ -49,6 +49,32 @@ set(exported_functions
SherpaOnnxDestroyKeywordSpotter
SherpaOnnxGetKeywordResult
SherpaOnnxIsKeywordStreamReady
# VAD
SherpaOnnxCreateCircularBuffer
SherpaOnnxDestroyCircularBuffer
SherpaOnnxCircularBufferPush
SherpaOnnxCircularBufferGet
SherpaOnnxCircularBufferFree
SherpaOnnxCircularBufferPop
SherpaOnnxCircularBufferSize
SherpaOnnxCircularBufferHead
SherpaOnnxCircularBufferReset
SherpaOnnxCreateVoiceActivityDetector
SherpaOnnxDestroyVoiceActivityDetector
SherpaOnnxVoiceActivityDetectorAcceptWaveform
SherpaOnnxVoiceActivityDetectorEmpty
SherpaOnnxVoiceActivityDetectorDetected
SherpaOnnxVoiceActivityDetectorPop
SherpaOnnxVoiceActivityDetectorClear
SherpaOnnxVoiceActivityDetectorFront
SherpaOnnxDestroySpeechSegment
SherpaOnnxVoiceActivityDetectorReset
SherpaOnnxVoiceActivityDetectorFlush
#
SherpaOnnxFileExists
SherpaOnnxReadWave
SherpaOnnxFreeWave
SherpaOnnxWriteWave
)
...
...
@@ -82,6 +108,8 @@ install(
${
CMAKE_SOURCE_DIR
}
/wasm/asr/sherpa-onnx-asr.js
${
CMAKE_SOURCE_DIR
}
/wasm/tts/sherpa-onnx-tts.js
${
CMAKE_SOURCE_DIR
}
/wasm/kws/sherpa-onnx-kws.js
${
CMAKE_SOURCE_DIR
}
/wasm/vad/sherpa-onnx-vad.js
${
CMAKE_SOURCE_DIR
}
/wasm/nodejs/sherpa-onnx-wave.js
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
DESTINATION
...
...
wasm/nodejs/sherpa-onnx-wave.js
0 → 100644
查看文件 @
5ed8e31
// return an object
// {
// samples: a float32 array
// sampleRate: an integer
// }
function
readWave
(
filename
,
Module
)
{
const
filenameLen
=
Module
.
lengthBytesUTF8
(
filename
)
+
1
;
const
pFilename
=
Module
.
_malloc
(
filenameLen
);
Module
.
stringToUTF8
(
filename
,
pFilename
,
filenameLen
);
const
w
=
Module
.
_SherpaOnnxReadWave
(
pFilename
);
Module
.
_free
(
pFilename
);
const
samplesPtr
=
Module
.
HEAP32
[
w
/
4
]
/
4
;
const
sampleRate
=
Module
.
HEAP32
[
w
/
4
+
1
];
const
numSamples
=
Module
.
HEAP32
[
w
/
4
+
2
];
const
samples
=
new
Float32Array
(
numSamples
);
for
(
let
i
=
0
;
i
<
numSamples
;
i
++
)
{
samples
[
i
]
=
Module
.
HEAPF32
[
samplesPtr
+
i
];
}
Module
.
_SherpaOnnxFreeWave
(
w
);
return
{
samples
:
samples
,
sampleRate
:
sampleRate
};
}
// data is an object
// {
// samples: a float32 array
// sampleRate: an integer
// }
function
writeWave
(
filename
,
data
,
Module
)
{
const
pSamples
=
Module
.
_malloc
(
data
.
samples
.
length
*
data
.
samples
.
BYTES_PER_ELEMENT
);
Module
.
HEAPF32
.
set
(
data
.
samples
,
pSamples
/
data
.
samples
.
BYTES_PER_ELEMENT
);
const
filenameLen
=
Module
.
lengthBytesUTF8
(
filename
)
+
1
;
const
pFilename
=
Module
.
_malloc
(
filenameLen
);
Module
.
stringToUTF8
(
filename
,
pFilename
,
filenameLen
);
Module
.
_SherpaOnnxWriteWave
(
pSamples
,
data
.
samples
.
length
,
data
.
sampleRate
,
pFilename
);
Module
.
_free
(
pFilename
);
Module
.
_free
(
pSamples
);
}
if
(
typeof
process
==
'object'
&&
typeof
process
.
versions
==
'object'
&&
typeof
process
.
versions
.
node
==
'string'
)
{
module
.
exports
=
{
readWave
,
writeWave
,
};
}
...
...
请
注册
或
登录
后发表评论