Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-05-10 18:21:05 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-05-10 18:21:05 +0800
Commit
65f5161456deab924ce62c44095f7d48af9fa9db
65f51614
1 parent
46e4e5b7
Add more streaming ASR methods for node-addon-api (#860)
隐藏空白字符变更
内嵌
并排对比
正在显示
6 个修改的文件
包含
398 行增加
和
13 行删除
nodejs-addon-examples/README.md
nodejs-addon-examples/package.json
nodejs-addon-examples/test_asr_streaming_transducer_microphone.js
scripts/node-addon-api/lib/sherpa-onnx.js
scripts/node-addon-api/lib/streaming-asr.js
scripts/node-addon-api/src/streaming-asr.cc
nodejs-addon-examples/README.md
查看文件 @
65f5161
...
...
@@ -28,9 +28,13 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PAT
```
## Streaming speech recognition with zipformer transducer
```
bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
node ./test_asr_streaming_transducer.js
node ./test_asr_streaming_transducer_microphone.js
```
...
...
nodejs-addon-examples/package.json
查看文件 @
65f5161
{
"dependencies"
:
{
"sherpa-onnx-node"
:
"*"
,
"perf_hooks"
:
"*"
"naudiodon2"
:
"^2.4.0"
,
"perf_hooks"
:
"*"
,
"sherpa-onnx-node"
:
"*"
}
}
...
...
nodejs-addon-examples/test_asr_streaming_transducer_microphone.js
0 → 100644
查看文件 @
65f5161
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
//
const
portAudio
=
require
(
'naudiodon2'
);
// console.log(portAudio.getDevices());
const
sherpa_onnx
=
require
(
'sherpa-onnx-node'
);
function
createOnlineRecognizer
()
{
const
config
=
{
'featConfig'
:
{
'sampleRate'
:
16000
,
'featureDim'
:
80
,
},
'modelConfig'
:
{
'transducer'
:
{
'encoder'
:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx'
,
'decoder'
:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'
,
'joiner'
:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx'
,
},
'tokens'
:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'
,
'numThreads'
:
2
,
'provider'
:
'cpu'
,
'debug'
:
1
,
'modelType'
:
'zipformer'
,
},
'decodingMethod'
:
'greedy_search'
,
'maxActivePaths'
:
4
,
'enableEndpoint'
:
true
,
'rule1MinTrailingSilence'
:
2.4
,
'rule2MinTrailingSilence'
:
1.2
,
'rule3MinUtteranceLength'
:
20
};
return
new
sherpa_onnx
.
OnlineRecognizer
(
config
);
}
const
recognizer
=
createOnlineRecognizer
();
const
stream
=
recognizer
.
createStream
();
let
lastText
=
''
;
let
segmentIndex
=
0
;
const
ai
=
new
portAudio
.
AudioIO
({
inOptions
:
{
channelCount
:
1
,
closeOnError
:
true
,
// Close the stream if an audio error is detected, if
// set false then just log the error
deviceId
:
-
1
,
// Use -1 or omit the deviceId to select the default device
sampleFormat
:
portAudio
.
SampleFormatFloat32
,
sampleRate
:
recognizer
.
config
.
featConfig
.
sampleRate
}
});
const
display
=
new
sherpa_onnx
.
Display
(
50
);
ai
.
on
(
'data'
,
data
=>
{
const
samples
=
new
Float32Array
(
data
.
buffer
);
stream
.
acceptWaveform
(
samples
,
recognizer
.
config
.
featConfig
.
sampleRate
);
while
(
recognizer
.
isReady
(
stream
))
{
recognizer
.
decode
(
stream
);
}
const
isEndpoint
=
recognizer
.
isEndpoint
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
if
(
text
.
length
>
0
&&
lastText
!=
text
)
{
lastText
=
text
;
display
.
print
(
segmentIndex
,
lastText
);
}
if
(
isEndpoint
)
{
if
(
text
.
length
>
0
)
{
lastText
=
text
;
segmentIndex
+=
1
;
}
recognizer
.
reset
(
stream
)
}
});
ai
.
on
(
'close'
,
()
=>
{
console
.
log
(
'Free resources'
);
stream
.
free
();
recognizer
.
free
();
});
ai
.
start
();
console
.
log
(
'Started! Please speak'
)
...
...
scripts/node-addon-api/lib/sherpa-onnx.js
查看文件 @
65f5161
...
...
@@ -4,4 +4,5 @@ const streaming_asr = require('./streaming-asr.js');
module
.
exports
=
{
OnlineRecognizer
:
streaming_asr
.
OnlineRecognizer
,
readWave
:
addon
.
readWave
,
Display
:
streaming_asr
.
Display
,
}
...
...
scripts/node-addon-api/lib/streaming-asr.js
查看文件 @
65f5161
const
addon
=
require
(
'./addon.js'
);
class
Display
{
constructor
(
maxWordPerline
)
{
this
.
handle
=
addon
.
createDisplay
(
maxWordPerline
);
}
print
(
idx
,
text
)
{
addon
.
print
(
this
.
handle
,
idx
,
text
)
}
}
class
OnlineStream
{
constructor
(
handle
)
{
this
.
handle
=
handle
;
...
...
@@ -10,11 +20,16 @@ class OnlineStream {
addon
.
acceptWaveformOnline
(
this
.
handle
,
{
samples
:
samples
,
sampleRate
:
sampleRate
})
}
inputFinished
()
{
addon
.
inputFinished
(
this
.
handle
)
}
}
class
OnlineRecognizer
{
constructor
(
config
)
{
this
.
handle
=
addon
.
createOnlineRecognizer
(
config
);
this
.
config
=
config
}
createStream
()
{
...
...
@@ -30,6 +45,14 @@ class OnlineRecognizer {
addon
.
decodeOnlineStream
(
this
.
handle
,
stream
.
handle
);
}
isEndpoint
(
stream
)
{
return
addon
.
isEndpoint
(
this
.
handle
,
stream
.
handle
);
}
reset
(
stream
)
{
addon
.
reset
(
this
.
handle
,
stream
.
handle
);
}
getResult
(
stream
)
{
const
jsonStr
=
addon
.
getOnlineStreamResultAsJson
(
this
.
handle
,
stream
.
handle
);
...
...
@@ -38,4 +61,7 @@ class OnlineRecognizer {
}
}
module
.
exports
=
{
OnlineRecognizer
}
module
.
exports
=
{
OnlineRecognizer
,
Display
}
...
...
scripts/node-addon-api/src/streaming-asr.cc
查看文件 @
65f5161
...
...
@@ -166,6 +166,69 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
memset
(
&
c
,
0
,
sizeof
(
c
));
c
.
feat_config
=
GetFeatureConfig
(
config
);
c
.
model_config
=
GetOnlineModelConfig
(
config
);
if
(
config
.
Has
(
"decodingMethod"
)
&&
config
.
Get
(
"decodingMethod"
).
IsString
())
{
Napi
::
String
decoding_method
=
config
.
Get
(
"decodingMethod"
).
As
<
Napi
::
String
>
();
std
::
string
s
=
decoding_method
.
Utf8Value
();
char
*
p
=
new
char
[
s
.
size
()
+
1
];
std
::
copy
(
s
.
begin
(),
s
.
end
(),
p
);
p
[
s
.
size
()]
=
0
;
c
.
decoding_method
=
p
;
}
if
(
config
.
Has
(
"maxActivePaths"
)
&&
config
.
Get
(
"maxActivePaths"
).
IsNumber
())
{
c
.
max_active_paths
=
config
.
Get
(
"maxActivePaths"
).
As
<
Napi
::
Number
>
().
Int32Value
();
}
// enableEndpoint can be either a boolean or an integer
if
(
config
.
Has
(
"enableEndpoint"
)
&&
(
config
.
Get
(
"enableEndpoint"
).
IsNumber
()
||
config
.
Get
(
"enableEndpoint"
).
IsBoolean
()))
{
if
(
config
.
Get
(
"enableEndpoint"
).
IsNumber
())
{
c
.
enable_endpoint
=
config
.
Get
(
"enableEndpoint"
).
As
<
Napi
::
Number
>
().
Int32Value
();
}
else
{
c
.
enable_endpoint
=
config
.
Get
(
"enableEndpoint"
).
As
<
Napi
::
Boolean
>
().
Value
();
}
}
if
(
config
.
Has
(
"rule1MinTrailingSilence"
)
&&
config
.
Get
(
"rule1MinTrailingSilence"
).
IsNumber
())
{
c
.
rule1_min_trailing_silence
=
config
.
Get
(
"rule1MinTrailingSilence"
).
As
<
Napi
::
Number
>
().
FloatValue
();
}
if
(
config
.
Has
(
"rule2MinTrailingSilence"
)
&&
config
.
Get
(
"rule2MinTrailingSilence"
).
IsNumber
())
{
c
.
rule2_min_trailing_silence
=
config
.
Get
(
"rule2MinTrailingSilence"
).
As
<
Napi
::
Number
>
().
FloatValue
();
}
if
(
config
.
Has
(
"rule3MinUtteranceLength"
)
&&
config
.
Get
(
"rule3MinUtteranceLength"
).
IsNumber
())
{
c
.
rule3_min_utterance_length
=
config
.
Get
(
"rule3MinUtteranceLength"
).
As
<
Napi
::
Number
>
().
FloatValue
();
}
if
(
config
.
Has
(
"hotwordsFile"
)
&&
config
.
Get
(
"hotwordsFile"
).
IsString
())
{
Napi
::
String
hotwords_file
=
config
.
Get
(
"hotwordsFile"
).
As
<
Napi
::
String
>
();
std
::
string
s
=
hotwords_file
.
Utf8Value
();
char
*
p
=
new
char
[
s
.
size
()
+
1
];
std
::
copy
(
s
.
begin
(),
s
.
end
(),
p
);
p
[
s
.
size
()]
=
0
;
c
.
hotwords_file
=
p
;
}
if
(
config
.
Has
(
"hotwordsScore"
)
&&
config
.
Get
(
"hotwordsScore"
).
IsNumber
())
{
c
.
hotwords_score
=
config
.
Get
(
"hotwordsScore"
).
As
<
Napi
::
Number
>
().
FloatValue
();
}
#if 0
printf("encoder: %s\n", c.model_config.transducer.encoder
? c.model_config.transducer.encoder
...
...
@@ -184,6 +247,15 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
printf("debug: %d\n", c.model_config.debug);
printf("model_type: %s\n",
c.model_config.model_type ? c.model_config.model_type : "no");
printf("decoding_method: %s\n", c.decoding_method ? c.decoding_method : "no");
printf("max_active_paths: %d\n", c.max_active_paths);
printf("enable_endpoint: %d\n", c.enable_endpoint);
printf("rule1_min_trailing_silence: %.3f\n", c.rule1_min_trailing_silence);
printf("rule2_min_trailing_silence: %.3f\n", c.rule2_min_trailing_silence);
printf("rule3_min_utterance_length: %.3f\n", c.rule3_min_utterance_length);
printf("hotwords_file: %s\n", c.hotwords_file ? c.hotwords_file : "no");
printf("hotwords_score: %.3f\n", c.hotwords_score);
#endif
SherpaOnnxOnlineRecognizer
*
recognizer
=
CreateOnlineRecognizer
(
&
c
);
...
...
@@ -212,6 +284,14 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
delete
[]
c
.
model_config
.
model_type
;
}
if
(
c
.
decoding_method
)
{
delete
[]
c
.
decoding_method
;
}
if
(
c
.
hotwords_file
)
{
delete
[]
c
.
hotwords_file
;
}
if
(
!
recognizer
)
{
Napi
::
TypeError
::
New
(
env
,
"Please check your config!"
)
.
ThrowAsJavaScriptException
();
...
...
@@ -270,7 +350,7 @@ static void AcceptWaveformWrapper(const Napi::CallbackInfo &info) {
}
if
(
!
info
[
0
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 0 should be a online stream pointer."
)
Napi
::
TypeError
::
New
(
env
,
"Argument 0 should be a
n
online stream pointer."
)
.
ThrowAsJavaScriptException
();
return
;
...
...
@@ -337,15 +417,14 @@ static Napi::Boolean IsOnlineStreamReadyWrapper(
if
(
!
info
[
0
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 0 should be a online recognizer pointer."
)
"Argument 0 should be a
n
online recognizer pointer."
)
.
ThrowAsJavaScriptException
();
return
{};
}
if
(
!
info
[
1
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 1 should be a online recognizer pointer."
)
Napi
::
TypeError
::
New
(
env
,
"Argument 1 should be an online stream pointer."
)
.
ThrowAsJavaScriptException
();
return
{};
...
...
@@ -375,15 +454,14 @@ static void DecodeOnlineStreamWrapper(const Napi::CallbackInfo &info) {
if
(
!
info
[
0
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 0 should be a online recognizer pointer."
)
"Argument 0 should be a
n
online recognizer pointer."
)
.
ThrowAsJavaScriptException
();
return
;
}
if
(
!
info
[
1
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 1 should be a online recognizer pointer."
)
Napi
::
TypeError
::
New
(
env
,
"Argument 1 should be an online stream pointer."
)
.
ThrowAsJavaScriptException
();
return
;
...
...
@@ -412,15 +490,14 @@ static Napi::String GetOnlineStreamResultAsJsonWrapper(
if
(
!
info
[
0
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 0 should be a online recognizer pointer."
)
"Argument 0 should be a
n
online recognizer pointer."
)
.
ThrowAsJavaScriptException
();
return
{};
}
if
(
!
info
[
1
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 1 should be a online recognizer pointer."
)
Napi
::
TypeError
::
New
(
env
,
"Argument 1 should be an online stream pointer."
)
.
ThrowAsJavaScriptException
();
return
{};
...
...
@@ -440,6 +517,175 @@ static Napi::String GetOnlineStreamResultAsJsonWrapper(
return
s
;
}
static
void
InputFinishedWrapper
(
const
Napi
::
CallbackInfo
&
info
)
{
Napi
::
Env
env
=
info
.
Env
();
if
(
info
.
Length
()
!=
1
)
{
std
::
ostringstream
os
;
os
<<
"Expect only 1 arguments. Given: "
<<
info
.
Length
();
Napi
::
TypeError
::
New
(
env
,
os
.
str
()).
ThrowAsJavaScriptException
();
return
;
}
if
(
!
info
[
0
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 0 should be an online stream pointer."
)
.
ThrowAsJavaScriptException
();
return
;
}
SherpaOnnxOnlineStream
*
stream
=
info
[
0
].
As
<
Napi
::
External
<
SherpaOnnxOnlineStream
>>
().
Data
();
InputFinished
(
stream
);
}
static
void
ResetOnlineStreamWrapper
(
const
Napi
::
CallbackInfo
&
info
)
{
Napi
::
Env
env
=
info
.
Env
();
if
(
info
.
Length
()
!=
2
)
{
std
::
ostringstream
os
;
os
<<
"Expect only 2 arguments. Given: "
<<
info
.
Length
();
Napi
::
TypeError
::
New
(
env
,
os
.
str
()).
ThrowAsJavaScriptException
();
return
;
}
if
(
!
info
[
0
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 0 should be an online recognizer pointer."
)
.
ThrowAsJavaScriptException
();
return
;
}
if
(
!
info
[
1
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 1 should be an online stream pointer."
)
.
ThrowAsJavaScriptException
();
return
;
}
SherpaOnnxOnlineRecognizer
*
recognizer
=
info
[
0
].
As
<
Napi
::
External
<
SherpaOnnxOnlineRecognizer
>>
().
Data
();
SherpaOnnxOnlineStream
*
stream
=
info
[
1
].
As
<
Napi
::
External
<
SherpaOnnxOnlineStream
>>
().
Data
();
Reset
(
recognizer
,
stream
);
}
static
Napi
::
Boolean
IsEndpointWrapper
(
const
Napi
::
CallbackInfo
&
info
)
{
Napi
::
Env
env
=
info
.
Env
();
if
(
info
.
Length
()
!=
2
)
{
std
::
ostringstream
os
;
os
<<
"Expect only 2 arguments. Given: "
<<
info
.
Length
();
Napi
::
TypeError
::
New
(
env
,
os
.
str
()).
ThrowAsJavaScriptException
();
return
{};
}
if
(
!
info
[
0
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 0 should be an online recognizer pointer."
)
.
ThrowAsJavaScriptException
();
return
{};
}
if
(
!
info
[
1
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 1 should be an online stream pointer."
)
.
ThrowAsJavaScriptException
();
return
{};
}
SherpaOnnxOnlineRecognizer
*
recognizer
=
info
[
0
].
As
<
Napi
::
External
<
SherpaOnnxOnlineRecognizer
>>
().
Data
();
SherpaOnnxOnlineStream
*
stream
=
info
[
1
].
As
<
Napi
::
External
<
SherpaOnnxOnlineStream
>>
().
Data
();
int32_t
is_endpoint
=
IsEndpoint
(
recognizer
,
stream
);
return
Napi
::
Boolean
::
New
(
env
,
is_endpoint
);
}
static
Napi
::
External
<
SherpaOnnxDisplay
>
CreateDisplayWrapper
(
const
Napi
::
CallbackInfo
&
info
)
{
Napi
::
Env
env
=
info
.
Env
();
if
(
info
.
Length
()
!=
1
)
{
std
::
ostringstream
os
;
os
<<
"Expect only 1 argument. Given: "
<<
info
.
Length
();
Napi
::
TypeError
::
New
(
env
,
os
.
str
()).
ThrowAsJavaScriptException
();
return
{};
}
if
(
!
info
[
0
].
IsNumber
())
{
Napi
::
TypeError
::
New
(
env
,
"Expect a number as the argument"
)
.
ThrowAsJavaScriptException
();
return
{};
}
int32_t
max_word_per_line
=
info
[
0
].
As
<
Napi
::
Number
>
().
Int32Value
();
const
SherpaOnnxDisplay
*
display
=
CreateDisplay
(
max_word_per_line
);
return
Napi
::
External
<
SherpaOnnxDisplay
>::
New
(
env
,
const_cast
<
SherpaOnnxDisplay
*>
(
display
),
[](
Napi
::
Env
env
,
SherpaOnnxDisplay
*
display
)
{
DestroyDisplay
(
display
);
});
}
static
void
PrintWrapper
(
const
Napi
::
CallbackInfo
&
info
)
{
Napi
::
Env
env
=
info
.
Env
();
if
(
info
.
Length
()
!=
3
)
{
std
::
ostringstream
os
;
os
<<
"Expect only 3 arguments. Given: "
<<
info
.
Length
();
Napi
::
TypeError
::
New
(
env
,
os
.
str
()).
ThrowAsJavaScriptException
();
return
;
}
if
(
!
info
[
0
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 0 should be an online stream pointer."
)
.
ThrowAsJavaScriptException
();
return
;
}
if
(
!
info
[
1
].
IsNumber
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 1 should be a number."
)
.
ThrowAsJavaScriptException
();
return
;
}
if
(
!
info
[
2
].
IsString
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 2 should be a string."
)
.
ThrowAsJavaScriptException
();
return
;
}
SherpaOnnxDisplay
*
display
=
info
[
0
].
As
<
Napi
::
External
<
SherpaOnnxDisplay
>>
().
Data
();
int32_t
idx
=
info
[
1
].
As
<
Napi
::
Number
>
().
Int32Value
();
Napi
::
String
text
=
info
[
2
].
As
<
Napi
::
String
>
();
std
::
string
s
=
text
.
Utf8Value
();
SherpaOnnxPrint
(
display
,
idx
,
s
.
c_str
());
}
void
InitStreamingAsr
(
Napi
::
Env
env
,
Napi
::
Object
exports
)
{
exports
.
Set
(
Napi
::
String
::
New
(
env
,
"createOnlineRecognizer"
),
Napi
::
Function
::
New
(
env
,
CreateOnlineRecognizerWrapper
));
...
...
@@ -458,4 +704,19 @@ void InitStreamingAsr(Napi::Env env, Napi::Object exports) {
exports
.
Set
(
Napi
::
String
::
New
(
env
,
"getOnlineStreamResultAsJson"
),
Napi
::
Function
::
New
(
env
,
GetOnlineStreamResultAsJsonWrapper
));
exports
.
Set
(
Napi
::
String
::
New
(
env
,
"inputFinished"
),
Napi
::
Function
::
New
(
env
,
InputFinishedWrapper
));
exports
.
Set
(
Napi
::
String
::
New
(
env
,
"reset"
),
Napi
::
Function
::
New
(
env
,
ResetOnlineStreamWrapper
));
exports
.
Set
(
Napi
::
String
::
New
(
env
,
"isEndpoint"
),
Napi
::
Function
::
New
(
env
,
IsEndpointWrapper
));
exports
.
Set
(
Napi
::
String
::
New
(
env
,
"createDisplay"
),
Napi
::
Function
::
New
(
env
,
CreateDisplayWrapper
));
exports
.
Set
(
Napi
::
String
::
New
(
env
,
"print"
),
Napi
::
Function
::
New
(
env
,
PrintWrapper
));
}
...
...
请
注册
或
登录
后发表评论