Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-05-13 11:58:25 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-05-13 11:58:25 +0800
Commit
384f96c40feadf1741df6d40a574007fbd44d35b
384f96c4
1 parent
db85b2c1
Add streaming CTC ASR APIs for node-addon-api (#867)
显示空白字符变更
内嵌
并排对比
正在显示
15 个修改的文件
包含
445 行增加
和
31 行删除
.github/scripts/test-nodejs-addon-npm.sh
.github/workflows/test-nodejs-addon-api.yaml
.github/workflows/test-nodejs-addon-npm.yaml
nodejs-addon-examples/README.md
nodejs-addon-examples/test_asr_streaming_ctc.js
nodejs-addon-examples/test_asr_streaming_ctc_hlg.js
nodejs-addon-examples/test_asr_streaming_ctc_hlg_microphone.js
nodejs-addon-examples/test_asr_streaming_ctc_microphone.js
nodejs-addon-examples/test_asr_streaming_transducer.js
nodejs-addon-examples/test_asr_streaming_transducer_microphone.js
scripts/apk/generate-tts-apk-script.py
scripts/node-addon-api/lib/addon.js
scripts/node-addon-api/run.sh
scripts/node-addon-api/src/streaming-asr.cc
sherpa-onnx/csrc/online-recognizer-ctc-impl.h
.github/scripts/test-nodejs-addon-npm.sh
查看文件 @
384f96c
...
...
@@ -5,15 +5,6 @@ set -ex
d
=
nodejs-addon-examples
echo
"dir:
$d
"
cd
$d
npm install --verbose
git status
ls -lh
ls -lh node_modules
export
DYLD_LIBRARY_PATH
=
$PWD
/node_modules/sherpa-onnx-darwin-x64:
$DYLD_LIBRARY_PATH
export
DYLD_LIBRARY_PATH
=
$PWD
/node_modules/sherpa-onnx-darwin-arm64:
$DYLD_LIBRARY_PATH
export
LD_LIBRARY_PATH
=
$PWD
/node_modules/sherpa-onnx-linux-x64:
$LD_LIBRARY_PATH
export
LD_LIBRARY_PATH
=
$PWD
/node_modules/sherpa-onnx-linux-arm64:
$LD_LIBRARY_PATH
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
...
...
@@ -22,3 +13,14 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
node test_asr_streaming_transducer.js
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
node ./test_asr_streaming_ctc.js
# To decode with HLG.fst
node ./test_asr_streaming_ctc_hlg.js
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
...
...
.github/workflows/test-nodejs-addon-api.yaml
查看文件 @
384f96c
...
...
@@ -152,17 +152,23 @@ jobs:
./node_modules/.bin/cmake-js compile --log-level verbose
-
name
:
Test streaming transducer
-
name
:
Run tests
shell
:
bash
run
:
|
export PATH=$PWD/build/install/lib:$PATH
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
cd scripts/node-addon-api
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
node test/test_asr_streaming_transducer.js
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
d=nodejs-addon-examples
cd $d
files=$(ls *.js)
echo $files
for f in ${files[@]}; do
echo $f
sed -i.bak s%sherpa-onnx-node%./sherpa-onnx% ./$f
done
cd ..
cp -v scripts/node-addon-api/build/Release/sherpa-onnx.node $d/
cp -v scripts/node-addon-api/lib/*.js $d/
cp -v ./build/install/lib/lib* $d/
.github/scripts/test-nodejs-addon-npm.sh
...
...
.github/workflows/test-nodejs-addon-npm.yaml
查看文件 @
384f96c
...
...
@@ -63,4 +63,19 @@ jobs:
-
name
:
Run tests
shell
:
bash
run
:
|
d=nodejs-addon-examples
echo "dir: $d"
cd $d
npm install --verbose
git status
ls -lh
ls -lh node_modules
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
cd ../
.github/scripts/test-nodejs-addon-npm.sh
...
...
nodejs-addon-examples/README.md
查看文件 @
384f96c
...
...
@@ -27,6 +27,18 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
export
LD_LIBRARY_PATH
=
$PWD
/node_modules/sherpa-onnx-linux-arm64:
$LD_LIBRARY_PATH
```
# Voice Activity detection (VAD)
```
bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
# To run the test with a microphone, you need to install the package naudiodon2
npm install naudiodon2
node ./test_vad_microphone.js
```
## Streaming speech recognition with zipformer transducer
```
bash
...
...
@@ -36,21 +48,27 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
node ./test_asr_streaming_transducer.js
# To run the test with microphone, you need to install the package naudiodon2
# To run the test with
a
microphone, you need to install the package naudiodon2
npm install naudiodon2
node ./test_asr_streaming_transducer_microphone.js
```
#
VAD
#
# Streaming speech recognition with zipformer CTC
```
bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
node ./test_asr_streaming_ctc.js
# To run the test with microphone, you need to install the package naudiodon2
# To decode with HLG.fst
node ./test_asr_streaming_ctc_hlg.js
# To run the test with a microphone, you need to install the package naudiodon2
npm install naudiodon2
node ./test_vad_microphone.js
node ./test_asr_streaming_ctc_microphone.js
node ./test_asr_streaming_ctc_hlg_microphone.js
```
...
...
nodejs-addon-examples/test_asr_streaming_ctc.js
0 → 100644
查看文件 @
384f96c
// Copyright (c) 2024 Xiaomi Corporation
const
sherpa_onnx
=
require
(
'sherpa-onnx-node'
);
const
performance
=
require
(
'perf_hooks'
).
performance
;
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const
config
=
{
'featConfig'
:
{
'sampleRate'
:
16000
,
'featureDim'
:
80
,
},
'modelConfig'
:
{
'zipformer2Ctc'
:
{
'model'
:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx'
,
},
'tokens'
:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt'
,
'numThreads'
:
2
,
'provider'
:
'cpu'
,
'debug'
:
1
,
}
};
const
waveFilename
=
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/0.wav'
;
const
recognizer
=
new
sherpa_onnx
.
OnlineRecognizer
(
config
);
console
.
log
(
'Started'
)
let
start
=
performance
.
now
();
const
stream
=
recognizer
.
createStream
();
const
wave
=
sherpa_onnx
.
readWave
(
waveFilename
);
stream
.
acceptWaveform
({
sampleRate
:
wave
.
sampleRate
,
samples
:
wave
.
samples
});
const
tailPadding
=
new
Float32Array
(
wave
.
sampleRate
*
0.4
);
stream
.
acceptWaveform
({
samples
:
tailPadding
,
sampleRate
:
wave
.
sampleRate
});
while
(
recognizer
.
isReady
(
stream
))
{
recognizer
.
decode
(
stream
);
}
result
=
recognizer
.
getResult
(
stream
)
let
stop
=
performance
.
now
();
console
.
log
(
'Done'
)
const
elapsed_seconds
=
(
stop
-
start
)
/
1000
;
const
duration
=
wave
.
samples
.
length
/
wave
.
sampleRate
;
const
real_time_factor
=
elapsed_seconds
/
duration
;
console
.
log
(
'Wave duration'
,
duration
.
toFixed
(
3
),
'secodns'
)
console
.
log
(
'Elapsed'
,
elapsed_seconds
.
toFixed
(
3
),
'secodns'
)
console
.
log
(
`
RTF
=
$
{
elapsed_seconds
.
toFixed
(
3
)}
/${duration.toFixed
(
3
)
} =`
,
real_time_factor
.
toFixed
(
3
))
console
.
log
(
waveFilename
)
console
.
log
(
'result\n'
,
result
)
...
...
nodejs-addon-examples/test_asr_streaming_ctc_hlg.js
0 → 100644
查看文件 @
384f96c
// Copyright (c) 2024 Xiaomi Corporation
const
sherpa_onnx
=
require
(
'sherpa-onnx-node'
);
const
performance
=
require
(
'perf_hooks'
).
performance
;
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const
config
=
{
'featConfig'
:
{
'sampleRate'
:
16000
,
'featureDim'
:
80
,
},
'modelConfig'
:
{
'zipformer2Ctc'
:
{
'model'
:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx'
,
},
'tokens'
:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt'
,
'numThreads'
:
2
,
'provider'
:
'cpu'
,
'debug'
:
1
,
},
'ctcFstDecoderConfig'
:
{
'graph'
:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst'
,
},
};
const
waveFilename
=
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav'
;
const
recognizer
=
new
sherpa_onnx
.
OnlineRecognizer
(
config
);
console
.
log
(
'Started'
)
let
start
=
performance
.
now
();
const
stream
=
recognizer
.
createStream
();
const
wave
=
sherpa_onnx
.
readWave
(
waveFilename
);
stream
.
acceptWaveform
({
sampleRate
:
wave
.
sampleRate
,
samples
:
wave
.
samples
});
const
tailPadding
=
new
Float32Array
(
wave
.
sampleRate
*
0.4
);
stream
.
acceptWaveform
({
samples
:
tailPadding
,
sampleRate
:
wave
.
sampleRate
});
while
(
recognizer
.
isReady
(
stream
))
{
recognizer
.
decode
(
stream
);
}
result
=
recognizer
.
getResult
(
stream
)
let
stop
=
performance
.
now
();
console
.
log
(
'Done'
)
const
elapsed_seconds
=
(
stop
-
start
)
/
1000
;
const
duration
=
wave
.
samples
.
length
/
wave
.
sampleRate
;
const
real_time_factor
=
elapsed_seconds
/
duration
;
console
.
log
(
'Wave duration'
,
duration
.
toFixed
(
3
),
'secodns'
)
console
.
log
(
'Elapsed'
,
elapsed_seconds
.
toFixed
(
3
),
'secodns'
)
console
.
log
(
`
RTF
=
$
{
elapsed_seconds
.
toFixed
(
3
)}
/${duration.toFixed
(
3
)
} =`
,
real_time_factor
.
toFixed
(
3
))
console
.
log
(
waveFilename
)
console
.
log
(
'result\n'
,
result
)
...
...
nodejs-addon-examples/test_asr_streaming_ctc_hlg_microphone.js
0 → 100644
查看文件 @
384f96c
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
//
const
portAudio
=
require
(
'naudiodon2'
);
// console.log(portAudio.getDevices());
const
sherpa_onnx
=
require
(
'sherpa-onnx-node'
);
function
createOnlineRecognizer
()
{
const
config
=
{
'featConfig'
:
{
'sampleRate'
:
16000
,
'featureDim'
:
80
,
},
'modelConfig'
:
{
'zipformer2Ctc'
:
{
'model'
:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx'
,
},
'tokens'
:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt'
,
'numThreads'
:
2
,
'provider'
:
'cpu'
,
'debug'
:
1
,
},
'ctcFstDecoderConfig'
:
{
'graph'
:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst'
,
},
'enableEndpoint'
:
true
,
'rule1MinTrailingSilence'
:
2.4
,
'rule2MinTrailingSilence'
:
1.2
,
'rule3MinUtteranceLength'
:
20
};
return
new
sherpa_onnx
.
OnlineRecognizer
(
config
);
}
const
recognizer
=
createOnlineRecognizer
();
const
stream
=
recognizer
.
createStream
();
let
lastText
=
''
;
let
segmentIndex
=
0
;
const
ai
=
new
portAudio
.
AudioIO
({
inOptions
:
{
channelCount
:
1
,
closeOnError
:
true
,
// Close the stream if an audio error is detected, if
// set false then just log the error
deviceId
:
-
1
,
// Use -1 or omit the deviceId to select the default device
sampleFormat
:
portAudio
.
SampleFormatFloat32
,
sampleRate
:
recognizer
.
config
.
featConfig
.
sampleRate
}
});
const
display
=
new
sherpa_onnx
.
Display
(
50
);
ai
.
on
(
'data'
,
data
=>
{
const
samples
=
new
Float32Array
(
data
.
buffer
);
stream
.
acceptWaveform
(
{
sampleRate
:
recognizer
.
config
.
featConfig
.
sampleRate
,
samples
:
samples
});
while
(
recognizer
.
isReady
(
stream
))
{
recognizer
.
decode
(
stream
);
}
const
isEndpoint
=
recognizer
.
isEndpoint
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
.
toLowerCase
();
if
(
text
.
length
>
0
&&
lastText
!=
text
)
{
lastText
=
text
;
display
.
print
(
segmentIndex
,
lastText
);
}
if
(
isEndpoint
)
{
if
(
text
.
length
>
0
)
{
lastText
=
text
;
segmentIndex
+=
1
;
}
recognizer
.
reset
(
stream
)
}
});
ai
.
on
(
'close'
,
()
=>
{
console
.
log
(
'Free resources'
);
stream
.
free
();
recognizer
.
free
();
});
ai
.
start
();
console
.
log
(
'Started! Please speak'
)
...
...
nodejs-addon-examples/test_asr_streaming_ctc_microphone.js
0 → 100644
查看文件 @
384f96c
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
//
const
portAudio
=
require
(
'naudiodon2'
);
// console.log(portAudio.getDevices());
const
sherpa_onnx
=
require
(
'sherpa-onnx-node'
);
function
createOnlineRecognizer
()
{
const
config
=
{
'featConfig'
:
{
'sampleRate'
:
16000
,
'featureDim'
:
80
,
},
'modelConfig'
:
{
'zipformer2Ctc'
:
{
'model'
:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx'
,
},
'tokens'
:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt'
,
'numThreads'
:
2
,
'provider'
:
'cpu'
,
'debug'
:
1
,
},
'decodingMethod'
:
'greedy_search'
,
'maxActivePaths'
:
4
,
'enableEndpoint'
:
true
,
'rule1MinTrailingSilence'
:
2.4
,
'rule2MinTrailingSilence'
:
1.2
,
'rule3MinUtteranceLength'
:
20
};
return
new
sherpa_onnx
.
OnlineRecognizer
(
config
);
}
const
recognizer
=
createOnlineRecognizer
();
const
stream
=
recognizer
.
createStream
();
let
lastText
=
''
;
let
segmentIndex
=
0
;
const
ai
=
new
portAudio
.
AudioIO
({
inOptions
:
{
channelCount
:
1
,
closeOnError
:
true
,
// Close the stream if an audio error is detected, if
// set false then just log the error
deviceId
:
-
1
,
// Use -1 or omit the deviceId to select the default device
sampleFormat
:
portAudio
.
SampleFormatFloat32
,
sampleRate
:
recognizer
.
config
.
featConfig
.
sampleRate
}
});
const
display
=
new
sherpa_onnx
.
Display
(
50
);
ai
.
on
(
'data'
,
data
=>
{
const
samples
=
new
Float32Array
(
data
.
buffer
);
stream
.
acceptWaveform
(
{
sampleRate
:
recognizer
.
config
.
featConfig
.
sampleRate
,
samples
:
samples
});
while
(
recognizer
.
isReady
(
stream
))
{
recognizer
.
decode
(
stream
);
}
const
isEndpoint
=
recognizer
.
isEndpoint
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
.
toLowerCase
();
if
(
text
.
length
>
0
&&
lastText
!=
text
)
{
lastText
=
text
;
display
.
print
(
segmentIndex
,
lastText
);
}
if
(
isEndpoint
)
{
if
(
text
.
length
>
0
)
{
lastText
=
text
;
segmentIndex
+=
1
;
}
recognizer
.
reset
(
stream
)
}
});
ai
.
on
(
'close'
,
()
=>
{
console
.
log
(
'Free resources'
);
stream
.
free
();
recognizer
.
free
();
});
ai
.
start
();
console
.
log
(
'Started! Please speak'
)
...
...
nodejs-addon-examples/test_asr_streaming_transducer.js
查看文件 @
384f96c
...
...
@@ -24,7 +24,6 @@ const config = {
'numThreads'
:
2
,
'provider'
:
'cpu'
,
'debug'
:
1
,
'modelType'
:
'zipformer'
,
}
};
...
...
@@ -53,5 +52,8 @@ const duration = wave.samples.length / wave.sampleRate;
const
real_time_factor
=
elapsed_seconds
/
duration
;
console
.
log
(
'Wave duration'
,
duration
.
toFixed
(
3
),
'secodns'
)
console
.
log
(
'Elapsed'
,
elapsed_seconds
.
toFixed
(
3
),
'secodns'
)
console
.
log
(
'RTF'
,
real_time_factor
.
toFixed
(
3
))
console
.
log
(
'result'
,
result
.
text
)
console
.
log
(
`
RTF
=
$
{
elapsed_seconds
.
toFixed
(
3
)}
/${duration.toFixed
(
3
)
} =`
,
real_time_factor
.
toFixed
(
3
))
console
.
log
(
waveFilename
)
console
.
log
(
'result\n'
,
result
)
...
...
nodejs-addon-examples/test_asr_streaming_transducer_microphone.js
查看文件 @
384f96c
...
...
@@ -25,7 +25,6 @@ function createOnlineRecognizer() {
'numThreads'
:
2
,
'provider'
:
'cpu'
,
'debug'
:
1
,
'modelType'
:
'zipformer'
,
},
'decodingMethod'
:
'greedy_search'
,
'maxActivePaths'
:
4
,
...
...
@@ -68,7 +67,7 @@ ai.on('data', data => {
}
const
isEndpoint
=
recognizer
.
isEndpoint
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
const
text
=
recognizer
.
getResult
(
stream
).
text
.
toLowerCase
()
;
if
(
text
.
length
>
0
&&
lastText
!=
text
)
{
lastText
=
text
;
...
...
scripts/apk/generate-tts-apk-script.py
查看文件 @
384f96c
...
...
@@ -158,7 +158,7 @@ def get_piper_models() -> List[TtsModel]:
TtsModel
(
model_dir
=
"vits-piper-fa_IR-gyro-medium"
),
TtsModel
(
model_dir
=
"vits-piper-fi_FI-harri-low"
),
TtsModel
(
model_dir
=
"vits-piper-fi_FI-harri-medium"
),
TtsModel
(
model_dir
=
"vits-piper-fr_FR-mls-medium"
),
#
TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
TtsModel
(
model_dir
=
"vits-piper-fr_FR-siwis-low"
),
TtsModel
(
model_dir
=
"vits-piper-fr_FR-siwis-medium"
),
TtsModel
(
model_dir
=
"vits-piper-fr_FR-upmc-medium"
),
...
...
scripts/node-addon-api/lib/addon.js
查看文件 @
384f96c
...
...
@@ -9,6 +9,7 @@ const possible_paths = [
'../build/Debug/sherpa-onnx.node'
,
`
.
/
node_modules
/
sherpa
-
onnx
-
$
{
platform_arch
}
/sherpa-onnx.node`
,
`
..
/
sherpa
-
onnx
-
$
{
platform_arch
}
/sherpa-onnx.node`
,
'./sherpa-onnx.node'
,
];
let
found
=
false
;
...
...
scripts/node-addon-api/run.sh
0 → 100755
查看文件 @
384f96c
#!/usr/bin/env bash
set
-ex
if
[[
! -f ../../build/install/lib/libsherpa-onnx-core.dylib
&&
! -f ../../build/install/lib/libsherpa-onnx-core.so
]]
;
then
pushd
../../
mkdir -p build
cd
build
cmake -DCMAKE_INSTALL_PREFIX
=
./install -DBUILD_SHARED_LIBS
=
ON ..
make install
popd
fi
export
SHERPA_ONNX_INSTALL_DIR
=
$PWD
/../../build/install
./node_modules/.bin/cmake-js compile
...
...
scripts/node-addon-api/src/streaming-asr.cc
查看文件 @
384f96c
...
...
@@ -89,6 +89,30 @@ static SherpaOnnxOnlineTransducerModelConfig GetOnlineTransducerModelConfig(
return
config
;
}
static
SherpaOnnxOnlineZipformer2CtcModelConfig
GetOnlineZipformer2CtcModelConfig
(
Napi
::
Object
obj
)
{
SherpaOnnxOnlineZipformer2CtcModelConfig
config
;
memset
(
&
config
,
0
,
sizeof
(
config
));
if
(
!
obj
.
Has
(
"zipformer2Ctc"
)
||
!
obj
.
Get
(
"zipformer2Ctc"
).
IsObject
())
{
return
config
;
}
Napi
::
Object
o
=
obj
.
Get
(
"zipformer2Ctc"
).
As
<
Napi
::
Object
>
();
if
(
o
.
Has
(
"model"
)
&&
o
.
Get
(
"model"
).
IsString
())
{
Napi
::
String
model
=
o
.
Get
(
"model"
).
As
<
Napi
::
String
>
();
std
::
string
s
=
model
.
Utf8Value
();
char
*
p
=
new
char
[
s
.
size
()
+
1
];
std
::
copy
(
s
.
begin
(),
s
.
end
(),
p
);
p
[
s
.
size
()]
=
0
;
config
.
model
=
p
;
}
return
config
;
}
static
SherpaOnnxOnlineModelConfig
GetOnlineModelConfig
(
Napi
::
Object
obj
)
{
SherpaOnnxOnlineModelConfig
config
;
memset
(
&
config
,
0
,
sizeof
(
config
));
...
...
@@ -100,6 +124,7 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
Napi
::
Object
o
=
obj
.
Get
(
"modelConfig"
).
As
<
Napi
::
Object
>
();
config
.
transducer
=
GetOnlineTransducerModelConfig
(
o
);
config
.
zipformer2_ctc
=
GetOnlineZipformer2CtcModelConfig
(
o
);
if
(
o
.
Has
(
"tokens"
)
&&
o
.
Get
(
"tokens"
).
IsString
())
{
Napi
::
String
tokens
=
o
.
Get
(
"tokens"
).
As
<
Napi
::
String
>
();
...
...
@@ -147,6 +172,35 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
return
config
;
}
static
SherpaOnnxOnlineCtcFstDecoderConfig
GetCtcFstDecoderConfig
(
Napi
::
Object
obj
)
{
SherpaOnnxOnlineCtcFstDecoderConfig
config
;
memset
(
&
config
,
0
,
sizeof
(
config
));
if
(
!
obj
.
Has
(
"ctcFstDecoderConfig"
)
||
!
obj
.
Get
(
"ctcFstDecoderConfig"
).
IsObject
())
{
return
config
;
}
Napi
::
Object
o
=
obj
.
Get
(
"ctcFstDecoderConfig"
).
As
<
Napi
::
Object
>
();
if
(
o
.
Has
(
"graph"
)
&&
o
.
Get
(
"graph"
).
IsString
())
{
Napi
::
String
graph
=
o
.
Get
(
"graph"
).
As
<
Napi
::
String
>
();
std
::
string
s
=
graph
.
Utf8Value
();
char
*
p
=
new
char
[
s
.
size
()
+
1
];
std
::
copy
(
s
.
begin
(),
s
.
end
(),
p
);
p
[
s
.
size
()]
=
0
;
config
.
graph
=
p
;
}
if
(
o
.
Has
(
"maxActive"
)
&&
o
.
Get
(
"maxActive"
).
IsNumber
())
{
config
.
max_active
=
o
.
Get
(
"maxActive"
).
As
<
Napi
::
Number
>
().
Int32Value
();
}
return
config
;
}
static
Napi
::
External
<
SherpaOnnxOnlineRecognizer
>
CreateOnlineRecognizerWrapper
(
const
Napi
::
CallbackInfo
&
info
)
{
Napi
::
Env
env
=
info
.
Env
();
...
...
@@ -234,6 +288,8 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
config
.
Get
(
"hotwordsScore"
).
As
<
Napi
::
Number
>
().
FloatValue
();
}
c
.
ctc_fst_decoder_config
=
GetCtcFstDecoderConfig
(
config
);
#if 0
printf("encoder: %s\n", c.model_config.transducer.encoder
? c.model_config.transducer.encoder
...
...
@@ -277,6 +333,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
delete
[]
c
.
model_config
.
transducer
.
joiner
;
}
if
(
c
.
model_config
.
zipformer2_ctc
.
model
)
{
delete
[]
c
.
model_config
.
zipformer2_ctc
.
model
;
}
if
(
c
.
model_config
.
tokens
)
{
delete
[]
c
.
model_config
.
tokens
;
}
...
...
@@ -297,6 +357,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
delete
[]
c
.
hotwords_file
;
}
if
(
c
.
ctc_fst_decoder_config
.
graph
)
{
delete
[]
c
.
ctc_fst_decoder_config
.
graph
;
}
if
(
!
recognizer
)
{
Napi
::
TypeError
::
New
(
env
,
"Please check your config!"
)
.
ThrowAsJavaScriptException
();
...
...
sherpa-onnx/csrc/online-recognizer-ctc-impl.h
查看文件 @
384f96c
...
...
@@ -216,6 +216,8 @@ class OnlineRecognizerCtcImpl : public OnlineRecognizerImpl {
// clear states
s
->
SetStates
(
model_
->
GetInitStates
());
s
->
GetFasterDecoderProcessedFrames
()
=
0
;
// Note: We only update counters. The underlying audio samples
// are not discarded.
s
->
Reset
();
...
...
请
注册
或
登录
后发表评论