Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
wanghsinche
2025-02-21 21:47:21 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-02-21 21:47:21 +0800
Commit
7774e35749765997e45c9ded6ceca6ed4a178afb
7774e357
1 parent
94728bfb
feat: add mic example for better compatibility (#1909)
Co-authored-by: wanghsinche <wanghsinche>
显示空白字符变更
内嵌
并排对比
正在显示
3 个修改的文件
包含
228 行增加
和
0 行删除
nodejs-examples/README.md
nodejs-examples/package.json
nodejs-examples/test-online-paraformer-microphone-mic.js
nodejs-examples/README.md
查看文件 @
7774e35
...
...
@@ -2,6 +2,8 @@
Note: You need
`Node >= 18`
.
Note: For Mac M1 and other silicon chip series, do check the example
`test-online-paraformer-microphone-mic.js`
This directory contains nodejs examples for
[
sherpa-onnx
](
https://github.com/k2-fsa/sherpa-onnx
)
.
It uses WebAssembly to wrap
`sherpa-onnx`
for NodeJS and it does not support multiple threads.
...
...
@@ -278,6 +280,25 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
node ./test-online-paraformer-microphone.js
```
## ./test-online-paraformer-microphone-mic.js
[
./test-online-paraformer-microphone-mic.js
](
./test-online-paraformer-microphone-mic.js
)
demonstrates how to do real-time speech recognition from microphone
with a streaming Paraformer model. In the code we use
[
sherpa-onnx-streaming-paraformer-bilingual-zh-en
](
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
)
.
It uses
`mic`
for better compatibility, do check its
[
npm
](
https://www.npmjs.com/package/mic
)
before running it.
You can use the following command to run it:
```
bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
node ./test-online-paraformer-microphone-mic.js
```
## ./test-online-paraformer.js
[
./test-online-paraformer.js
](
./test-online-paraformer.js
)
demonstrates
how to decode a file using a streaming Paraformer model. In the code we use
...
...
nodejs-examples/package.json
查看文件 @
7774e35
{
"dependencies"
:
{
"mic"
:
"^2.1.2"
,
"naudiodon2"
:
"^2.4.0"
,
"sherpa-onnx"
:
"^1.10.45"
,
"wav"
:
"^1.0.2"
...
...
nodejs-examples/test-online-paraformer-microphone-mic.js
0 → 100644
查看文件 @
7774e35
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
const
mic
=
require
(
'mic'
);
// It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it.
const
sherpa_onnx
=
require
(
'sherpa-onnx'
);
function
createOnlineRecognizer
()
{
let
onlineParaformerModelConfig
=
{
encoder
:
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx'
,
decoder
:
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx'
,
};
let
onlineModelConfig
=
{
paraformer
:
onlineParaformerModelConfig
,
tokens
:
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'
,
};
let
recognizerConfig
=
{
modelConfig
:
onlineModelConfig
,
enableEndpoint
:
1
,
rule1MinTrailingSilence
:
2.4
,
rule2MinTrailingSilence
:
1.2
,
rule3MinUtteranceLength
:
20
,
};
return
sherpa_onnx
.
createOnlineRecognizer
(
recognizerConfig
);
}
/**
* SpeechSession class, work as a session manager with the formatOutput function
* Sample output:
=== Automated Speech Recognition ===
Current Session #1
Time: 8:44:46 PM
------------------------
Recognized Sentences:
[8:44:43 PM] 1. it's so great three result is great great 她还支持中文
[8:44:46 PM] 2. 很厉
------------------------
Recognizing: 真的很厉害太厉害
*/
class
SpeechSession
{
constructor
()
{
this
.
startTime
=
Date
.
now
();
this
.
sentences
=
[];
this
.
currentText
=
''
;
this
.
lastUpdateTime
=
Date
.
now
();
}
addOrUpdateText
(
text
)
{
this
.
currentText
=
text
;
this
.
lastUpdateTime
=
Date
.
now
();
}
finalizeSentence
()
{
if
(
this
.
currentText
.
trim
())
{
this
.
sentences
.
push
({
text
:
this
.
currentText
.
trim
(),
timestamp
:
new
Date
().
toLocaleTimeString
()
});
}
this
.
currentText
=
''
;
}
shouldStartNewSession
()
{
return
Date
.
now
()
-
this
.
lastUpdateTime
>
10000
;
// 10 seconds of silence
}
}
function
formatOutput
()
{
clearConsole
();
console
.
log
(
'\n=== Automated Speech Recognition ==='
);
console
.
log
(
`
Current
Session
#
$
{
sessionCount
}
`
);
console
.
log
(
'Time:'
,
new
Date
().
toLocaleTimeString
());
console
.
log
(
'------------------------'
);
// 显示历史句子
if
(
currentSession
.
sentences
.
length
>
0
)
{
console
.
log
(
'Recognized Sentences:'
);
currentSession
.
sentences
.
forEach
((
sentence
,
index
)
=>
{
console
.
log
(
`
[
$
{
sentence
.
timestamp
}]
$
{
index
+
1
}.
$
{
sentence
.
text
}
`
);
});
console
.
log
(
'------------------------'
);
}
// 显示当前正在识别的内容
if
(
currentSession
.
currentText
)
{
console
.
log
(
'Recognizing:'
,
currentSession
.
currentText
);
}
}
const
recognizer
=
createOnlineRecognizer
();
const
stream
=
recognizer
.
createStream
();
let
currentSession
=
new
SpeechSession
();
let
sessionCount
=
1
;
function
clearConsole
()
{
process
.
stdout
.
write
(
'\x1B[2J\x1B[0f'
);
}
function
exitHandler
(
options
,
exitCode
)
{
if
(
options
.
cleanup
)
{
console
.
log
(
'\nCleaned up resources...'
);
micInstance
.
stop
();
stream
.
free
();
recognizer
.
free
();
}
if
(
exitCode
||
exitCode
===
0
)
console
.
log
(
'Exit code:'
,
exitCode
);
if
(
options
.
exit
)
process
.
exit
();
}
const
micInstance
=
mic
({
rate
:
recognizer
.
config
.
featConfig
.
sampleRate
,
channels
:
1
,
debug
:
false
,
// 关闭调试输出
device
:
'default'
,
bitwidth
:
16
,
encoding
:
'signed-integer'
,
exitOnSilence
:
6
,
fileType
:
'raw'
});
const
micInputStream
=
micInstance
.
getAudioStream
();
function
startMic
()
{
return
new
Promise
((
resolve
,
reject
)
=>
{
micInputStream
.
once
(
'startComplete'
,
()
=>
{
console
.
log
(
'Mic phone started.'
);
resolve
();
});
micInputStream
.
once
(
'error'
,
(
err
)
=>
{
console
.
error
(
'Mic phone start error:'
,
err
);
reject
(
err
);
});
micInstance
.
start
();
});
}
micInputStream
.
on
(
'data'
,
buffer
=>
{
const
int16Array
=
new
Int16Array
(
buffer
.
buffer
);
const
samples
=
new
Float32Array
(
int16Array
.
length
);
for
(
let
i
=
0
;
i
<
int16Array
.
length
;
i
++
)
{
samples
[
i
]
=
int16Array
[
i
]
/
32768.0
;
}
stream
.
acceptWaveform
(
recognizer
.
config
.
featConfig
.
sampleRate
,
samples
);
while
(
recognizer
.
isReady
(
stream
))
{
recognizer
.
decode
(
stream
);
}
const
isEndpoint
=
recognizer
.
isEndpoint
(
stream
);
const
text
=
recognizer
.
getResult
(
stream
).
text
;
if
(
text
.
length
>
0
)
{
// 检查是否需要开始新会话
if
(
currentSession
.
shouldStartNewSession
())
{
currentSession
.
finalizeSentence
();
sessionCount
++
;
currentSession
=
new
SpeechSession
();
}
currentSession
.
addOrUpdateText
(
text
);
formatOutput
();
}
if
(
isEndpoint
)
{
if
(
text
.
length
>
0
)
{
currentSession
.
finalizeSentence
();
formatOutput
();
}
recognizer
.
reset
(
stream
);
}
});
micInputStream
.
on
(
'error'
,
err
=>
{
console
.
error
(
'Audio stream error:'
,
err
);
});
micInputStream
.
on
(
'close'
,
()
=>
{
console
.
log
(
'Mic phone closed.'
);
});
process
.
on
(
'exit'
,
exitHandler
.
bind
(
null
,
{
cleanup
:
true
}));
process
.
on
(
'SIGINT'
,
exitHandler
.
bind
(
null
,
{
exit
:
true
}));
process
.
on
(
'SIGUSR1'
,
exitHandler
.
bind
(
null
,
{
exit
:
true
}));
process
.
on
(
'SIGUSR2'
,
exitHandler
.
bind
(
null
,
{
exit
:
true
}));
process
.
on
(
'uncaughtException'
,
exitHandler
.
bind
(
null
,
{
exit
:
true
}));
async
function
main
()
{
try
{
console
.
log
(
'Starting ...'
);
await
startMic
();
console
.
log
(
'Initialized, waiting for speech ...'
);
formatOutput
();
}
catch
(
err
)
{
console
.
error
(
'Failed to initialize:'
,
err
);
process
.
exit
(
1
);
}
}
main
();
\ No newline at end of file
...
...
请
注册
或
登录
后发表评论