Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-05-13 20:26:11 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-05-13 20:26:11 +0800
Commit
939fdd942c9436b977c737a2866b827099d4356d
939fdd94
1 parent
031134b4
Add spoken language identification for node-addon-api (#872)
显示空白字符变更
内嵌
并排对比
正在显示
13 个修改的文件
包含
445 行增加
和
1 行删除
.github/scripts/node-addon/run.sh
.github/scripts/test-nodejs-addon-npm.sh
.github/workflows/npm-addon.yaml
nodejs-addon-examples/README.md
nodejs-addon-examples/test_spoken_language_identification.js
nodejs-addon-examples/test_vad_microphone.js
nodejs-addon-examples/test_vad_spoken_language_identification_microphone.js
scripts/node-addon-api/CMakeLists.txt
scripts/node-addon-api/lib/non-streaming-asr.js
scripts/node-addon-api/lib/sherpa-onnx.js
scripts/node-addon-api/lib/spoken-language-identification.js
scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc
scripts/node-addon-api/src/spoken-language-identification.cc
.github/scripts/node-addon/run.sh
查看文件 @
939fdd9
...
...
@@ -18,6 +18,8 @@ fi
SHERPA_ONNX_VERSION
=
$(
grep
"SHERPA_ONNX_VERSION"
./CMakeLists.txt | cut -d
" "
-f 2 | cut -d
'"'
-f 2
)
echo
"SHERPA_ONNX_VERSION
$SHERPA_ONNX_VERSION
"
# SHERPA_ONNX_VERSION=1.0.20
if
[
-z
$owner
]
;
then
owner
=
k2-fsa
fi
...
...
.github/scripts/test-nodejs-addon-npm.sh
查看文件 @
939fdd9
...
...
@@ -6,6 +6,20 @@ d=nodejs-addon-examples
echo
"dir:
$d
"
cd
$d
echo
"----------spoken language identification----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2
node ./test_spoken_language_identification.js
rm -rf sherpa-onnx-whisper-tiny
rm -rf spoken-language-identification-test-wavs
echo
"----------streaming asr----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
...
...
.github/workflows/npm-addon.yaml
查看文件 @
939fdd9
...
...
@@ -52,6 +52,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
# SHERPA_ONNX_VERSION=1.0.20
src_dir=.github/scripts/node-addon
sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json
...
...
nodejs-addon-examples/README.md
查看文件 @
939fdd9
...
...
@@ -183,3 +183,21 @@ rm vits-icefall-zh-aishell3.tar.bz2
node ./test_tts_non_streaming_vits_zh_aishell3.js
```
## Spoken language identification with Whisper multi-lingual models
```
bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2
node ./test_spoken_language_identification.js
# To run VAD + spoken language identification using a microphone
npm install naudiodon2
node ./test_vad_spoken_language_identification_microphone.js
```
...
...
nodejs-addon-examples/test_spoken_language_identification.js
0 → 100644
查看文件 @
939fdd9
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
const
sherpa_onnx
=
require
(
'sherpa-onnx-node'
);
function
createSpokenLanguageID
()
{
const
config
=
{
whisper
:
{
encoder
:
'./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx'
,
decoder
:
'./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx'
,
},
debug
:
true
,
numThreads
:
1
,
provider
:
'cpu'
,
};
return
new
sherpa_onnx
.
SpokenLanguageIdentification
(
config
);
}
const
slid
=
createSpokenLanguageID
();
const
testWaves
=
[
'./spoken-language-identification-test-wavs/ar-arabic.wav'
,
'./spoken-language-identification-test-wavs/de-german.wav'
,
'./spoken-language-identification-test-wavs/en-english.wav'
,
'./spoken-language-identification-test-wavs/fr-french.wav'
,
'./spoken-language-identification-test-wavs/pt-portuguese.wav'
,
'./spoken-language-identification-test-wavs/es-spanish.wav'
,
'./spoken-language-identification-test-wavs/zh-chinese.wav'
,
];
const
display
=
new
Intl
.
DisplayNames
([
'en'
],
{
type
:
'language'
})
for
(
let
f
of
testWaves
)
{
const
stream
=
slid
.
createStream
();
const
wave
=
sherpa_onnx
.
readWave
(
f
);
stream
.
acceptWaveform
({
sampleRate
:
wave
.
sampleRate
,
samples
:
wave
.
samples
});
const
lang
=
slid
.
compute
(
stream
);
console
.
log
(
f
.
split
(
'/'
)[
2
],
lang
,
display
.
of
(
lang
));
}
...
...
nodejs-addon-examples/test_vad_microphone.js
查看文件 @
939fdd9
...
...
@@ -26,7 +26,7 @@ function createVad() {
return
new
sherpa_onnx
.
Vad
(
config
,
bufferSizeInSeconds
);
}
vad
=
createVad
();
const
vad
=
createVad
();
const
bufferSizeInSeconds
=
30
;
const
buffer
=
...
...
nodejs-addon-examples/test_vad_spoken_language_identification_microphone.js
0 → 100644
查看文件 @
939fdd9
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
const
portAudio
=
require
(
'naudiodon2'
);
// console.log(portAudio.getDevices());
const
sherpa_onnx
=
require
(
'sherpa-onnx-node'
);
function
createVad
()
{
// please download silero_vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
const
config
=
{
sileroVad
:
{
model
:
'./silero_vad.onnx'
,
threshold
:
0.5
,
minSpeechDuration
:
0.25
,
minSilenceDuration
:
0.5
,
windowSize
:
512
,
},
sampleRate
:
16000
,
debug
:
true
,
numThreads
:
1
,
};
const
bufferSizeInSeconds
=
60
;
return
new
sherpa_onnx
.
Vad
(
config
,
bufferSizeInSeconds
);
}
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
function
createSpokenLanguageID
()
{
const
config
=
{
whisper
:
{
encoder
:
'./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx'
,
decoder
:
'./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx'
,
},
debug
:
true
,
numThreads
:
1
,
provider
:
'cpu'
,
};
return
new
sherpa_onnx
.
SpokenLanguageIdentification
(
config
);
}
const
slid
=
createSpokenLanguageID
();
const
vad
=
createVad
();
const
display
=
new
Intl
.
DisplayNames
([
'en'
],
{
type
:
'language'
})
const
bufferSizeInSeconds
=
30
;
const
buffer
=
new
sherpa_onnx
.
CircularBuffer
(
bufferSizeInSeconds
*
vad
.
config
.
sampleRate
);
const
ai
=
new
portAudio
.
AudioIO
({
inOptions
:
{
channelCount
:
1
,
closeOnError
:
true
,
// Close the stream if an audio error is detected, if
// set false then just log the error
deviceId
:
-
1
,
// Use -1 or omit the deviceId to select the default device
sampleFormat
:
portAudio
.
SampleFormatFloat32
,
sampleRate
:
vad
.
config
.
sampleRate
,
}
});
let
printed
=
false
;
let
index
=
0
;
ai
.
on
(
'data'
,
data
=>
{
const
windowSize
=
vad
.
config
.
sileroVad
.
windowSize
;
buffer
.
push
(
new
Float32Array
(
data
.
buffer
));
while
(
buffer
.
size
()
>
windowSize
)
{
const
samples
=
buffer
.
get
(
buffer
.
head
(),
windowSize
);
buffer
.
pop
(
windowSize
);
vad
.
acceptWaveform
(
samples
)
if
(
vad
.
isDetected
()
&&
!
printed
)
{
console
.
log
(
`
$
{
index
}:
Detected
speech
`
)
printed
=
true
;
}
if
(
!
vad
.
isDetected
())
{
printed
=
false
;
}
while
(
!
vad
.
isEmpty
())
{
const
segment
=
vad
.
front
();
vad
.
pop
();
const
stream
=
slid
.
createStream
();
stream
.
acceptWaveform
(
{
samples
:
segment
.
samples
,
sampleRate
:
vad
.
config
.
sampleRate
});
const
lang
=
slid
.
compute
(
stream
);
const
fullLang
=
display
.
of
(
lang
);
const
filename
=
`
$
{
index
}
-
$
{
fullLang
}
-
$
{
new
Date
()
.
toLocaleTimeString
(
'en-US'
,
{
hour12
:
false
})
.
split
(
' '
)[
0
]}.
wav
`
;
sherpa_onnx
.
writeWave
(
filename
,
{
samples
:
segment
.
samples
,
sampleRate
:
vad
.
config
.
sampleRate
});
const
duration
=
segment
.
samples
.
length
/
vad
.
config
.
sampleRate
;
console
.
log
(
`
$
{
index
}
End
of
speech
.
Duration
:
$
{
duration
}
seconds
.
\
n
Detected
language
:
$
{
fullLang
}
`
);
console
.
log
(
`
Saved
to
$
{
filename
}
`
);
index
+=
1
;
}
}
});
ai
.
on
(
'close'
,
()
=>
{
console
.
log
(
'Free resources'
);
});
ai
.
start
();
console
.
log
(
'Started! Please speak'
)
...
...
scripts/node-addon-api/CMakeLists.txt
查看文件 @
939fdd9
...
...
@@ -21,6 +21,7 @@ set(srcs
src/non-streaming-asr.cc
src/non-streaming-tts.cc
src/sherpa-onnx-node-addon-api.cc
src/spoken-language-identification.cc
src/streaming-asr.cc
src/vad.cc
src/wave-reader.cc
...
...
scripts/node-addon-api/lib/non-streaming-asr.js
查看文件 @
939fdd9
...
...
@@ -37,4 +37,5 @@ class OfflineRecognizer {
module
.
exports
=
{
OfflineRecognizer
,
OfflineStream
,
}
...
...
scripts/node-addon-api/lib/sherpa-onnx.js
查看文件 @
939fdd9
...
...
@@ -3,6 +3,7 @@ const streaming_asr = require('./streaming-asr.js');
const
non_streaming_asr
=
require
(
'./non-streaming-asr.js'
);
const
non_streaming_tts
=
require
(
'./non-streaming-tts.js'
);
const
vad
=
require
(
'./vad.js'
);
const
slid
=
require
(
'./spoken-language-identification.js'
);
module
.
exports
=
{
OnlineRecognizer
:
streaming_asr
.
OnlineRecognizer
,
...
...
@@ -13,4 +14,5 @@ module.exports = {
Display
:
streaming_asr
.
Display
,
Vad
:
vad
.
Vad
,
CircularBuffer
:
vad
.
CircularBuffer
,
SpokenLanguageIdentification
:
slid
.
SpokenLanguageIdentification
,
}
...
...
scripts/node-addon-api/lib/spoken-language-identification.js
0 → 100644
查看文件 @
939fdd9
const
addon
=
require
(
'./addon.js'
);
const
non_streaming_asr
=
require
(
'./non-streaming-asr.js'
);
class
SpokenLanguageIdentification
{
constructor
(
config
)
{
this
.
handle
=
addon
.
createSpokenLanguageIdentification
(
config
);
this
.
config
=
config
;
}
createStream
()
{
return
new
non_streaming_asr
.
OfflineStream
(
addon
.
createSpokenLanguageIdentificationOfflineStream
(
this
.
handle
));
}
// return a string containing the language code (2 characters),
// e.g., en, de, fr, es, zh
// en -> English
// de -> German
// fr -> French
// es -> Spanish
// zh -> Chinese
compute
(
stream
)
{
return
addon
.
spokenLanguageIdentificationCompute
(
this
.
handle
,
stream
.
handle
);
}
}
module
.
exports
=
{
SpokenLanguageIdentification
,
}
...
...
scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc
查看文件 @
939fdd9
...
...
@@ -15,6 +15,8 @@ void InitWaveReader(Napi::Env env, Napi::Object exports);
void
InitWaveWriter
(
Napi
::
Env
env
,
Napi
::
Object
exports
);
void
InitSpokenLanguageID
(
Napi
::
Env
env
,
Napi
::
Object
exports
);
Napi
::
Object
Init
(
Napi
::
Env
env
,
Napi
::
Object
exports
)
{
InitStreamingAsr
(
env
,
exports
);
InitNonStreamingAsr
(
env
,
exports
);
...
...
@@ -22,6 +24,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitVad
(
env
,
exports
);
InitWaveReader
(
env
,
exports
);
InitWaveWriter
(
env
,
exports
);
InitSpokenLanguageID
(
env
,
exports
);
return
exports
;
}
...
...
scripts/node-addon-api/src/spoken-language-identification.cc
0 → 100644
查看文件 @
939fdd9
// scripts/node-addon-api/src/spoken-language-identification.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <sstream>
#include "napi.h" // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
static
SherpaOnnxSpokenLanguageIdentificationWhisperConfig
GetSpokenLanguageIdentificationWhisperConfig
(
Napi
::
Object
obj
)
{
SherpaOnnxSpokenLanguageIdentificationWhisperConfig
c
;
memset
(
&
c
,
0
,
sizeof
(
c
));
if
(
!
obj
.
Has
(
"whisper"
)
||
!
obj
.
Get
(
"whisper"
).
IsObject
())
{
return
c
;
}
Napi
::
Object
o
=
obj
.
Get
(
"whisper"
).
As
<
Napi
::
Object
>
();
if
(
o
.
Has
(
"encoder"
)
&&
o
.
Get
(
"encoder"
).
IsString
())
{
Napi
::
String
encoder
=
o
.
Get
(
"encoder"
).
As
<
Napi
::
String
>
();
std
::
string
s
=
encoder
.
Utf8Value
();
char
*
p
=
new
char
[
s
.
size
()
+
1
];
std
::
copy
(
s
.
begin
(),
s
.
end
(),
p
);
p
[
s
.
size
()]
=
0
;
c
.
encoder
=
p
;
}
if
(
o
.
Has
(
"decoder"
)
&&
o
.
Get
(
"decoder"
).
IsString
())
{
Napi
::
String
decoder
=
o
.
Get
(
"decoder"
).
As
<
Napi
::
String
>
();
std
::
string
s
=
decoder
.
Utf8Value
();
char
*
p
=
new
char
[
s
.
size
()
+
1
];
std
::
copy
(
s
.
begin
(),
s
.
end
(),
p
);
p
[
s
.
size
()]
=
0
;
c
.
decoder
=
p
;
}
if
(
o
.
Has
(
"tailPaddings"
)
&&
o
.
Get
(
"tailPaddings"
).
IsNumber
())
{
c
.
tail_paddings
=
o
.
Get
(
"tailPaddings"
).
As
<
Napi
::
Number
>
().
Int32Value
();
}
return
c
;
}
static
Napi
::
External
<
SherpaOnnxSpokenLanguageIdentification
>
CreateSpokenLanguageIdentificationWrapper
(
const
Napi
::
CallbackInfo
&
info
)
{
Napi
::
Env
env
=
info
.
Env
();
if
(
info
.
Length
()
!=
1
)
{
std
::
ostringstream
os
;
os
<<
"Expect only 1 argument. Given: "
<<
info
.
Length
();
Napi
::
TypeError
::
New
(
env
,
os
.
str
()).
ThrowAsJavaScriptException
();
return
{};
}
if
(
!
info
[
0
].
IsObject
())
{
Napi
::
TypeError
::
New
(
env
,
"You should pass an object as the only argument."
)
.
ThrowAsJavaScriptException
();
return
{};
}
Napi
::
Object
o
=
info
[
0
].
As
<
Napi
::
Object
>
();
SherpaOnnxSpokenLanguageIdentificationConfig
c
;
memset
(
&
c
,
0
,
sizeof
(
c
));
c
.
whisper
=
GetSpokenLanguageIdentificationWhisperConfig
(
o
);
if
(
o
.
Has
(
"numThreads"
)
&&
o
.
Get
(
"numThreads"
).
IsNumber
())
{
c
.
num_threads
=
o
.
Get
(
"numThreads"
).
As
<
Napi
::
Number
>
().
Int32Value
();
}
if
(
o
.
Has
(
"debug"
)
&&
(
o
.
Get
(
"debug"
).
IsNumber
()
||
o
.
Get
(
"debug"
).
IsBoolean
()))
{
if
(
o
.
Get
(
"debug"
).
IsBoolean
())
{
c
.
debug
=
o
.
Get
(
"debug"
).
As
<
Napi
::
Boolean
>
().
Value
();
}
else
{
c
.
debug
=
o
.
Get
(
"debug"
).
As
<
Napi
::
Number
>
().
Int32Value
();
}
}
if
(
o
.
Has
(
"provider"
)
&&
o
.
Get
(
"provider"
).
IsString
())
{
Napi
::
String
provider
=
o
.
Get
(
"provider"
).
As
<
Napi
::
String
>
();
std
::
string
s
=
provider
.
Utf8Value
();
char
*
p
=
new
char
[
s
.
size
()
+
1
];
std
::
copy
(
s
.
begin
(),
s
.
end
(),
p
);
p
[
s
.
size
()]
=
0
;
c
.
provider
=
p
;
}
const
SherpaOnnxSpokenLanguageIdentification
*
slid
=
SherpaOnnxCreateSpokenLanguageIdentification
(
&
c
);
if
(
c
.
whisper
.
encoder
)
{
delete
[]
c
.
whisper
.
encoder
;
}
if
(
c
.
whisper
.
decoder
)
{
delete
[]
c
.
whisper
.
decoder
;
}
if
(
c
.
provider
)
{
delete
[]
c
.
provider
;
}
if
(
!
slid
)
{
Napi
::
TypeError
::
New
(
env
,
"Please check your config!"
)
.
ThrowAsJavaScriptException
();
return
{};
}
return
Napi
::
External
<
SherpaOnnxSpokenLanguageIdentification
>::
New
(
env
,
const_cast
<
SherpaOnnxSpokenLanguageIdentification
*>
(
slid
),
[](
Napi
::
Env
env
,
SherpaOnnxSpokenLanguageIdentification
*
slid
)
{
SherpaOnnxDestroySpokenLanguageIdentification
(
slid
);
});
}
static
Napi
::
External
<
SherpaOnnxOfflineStream
>
SpokenLanguageIdentificationCreateOfflineStreamWrapper
(
const
Napi
::
CallbackInfo
&
info
)
{
Napi
::
Env
env
=
info
.
Env
();
if
(
info
.
Length
()
!=
1
)
{
std
::
ostringstream
os
;
os
<<
"Expect only 1 argument. Given: "
<<
info
.
Length
();
Napi
::
TypeError
::
New
(
env
,
os
.
str
()).
ThrowAsJavaScriptException
();
return
{};
}
if
(
!
info
[
0
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"You should pass an offline language ID pointer as the only argument"
)
.
ThrowAsJavaScriptException
();
return
{};
}
SherpaOnnxSpokenLanguageIdentification
*
slid
=
info
[
0
]
.
As
<
Napi
::
External
<
SherpaOnnxSpokenLanguageIdentification
>>
()
.
Data
();
SherpaOnnxOfflineStream
*
stream
=
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream
(
slid
);
return
Napi
::
External
<
SherpaOnnxOfflineStream
>::
New
(
env
,
stream
,
[](
Napi
::
Env
env
,
SherpaOnnxOfflineStream
*
stream
)
{
DestroyOfflineStream
(
stream
);
});
}
static
Napi
::
String
SpokenLanguageIdentificationComputeWrapper
(
const
Napi
::
CallbackInfo
&
info
)
{
Napi
::
Env
env
=
info
.
Env
();
if
(
info
.
Length
()
!=
2
)
{
std
::
ostringstream
os
;
os
<<
"Expect only 2 arguments. Given: "
<<
info
.
Length
();
Napi
::
TypeError
::
New
(
env
,
os
.
str
()).
ThrowAsJavaScriptException
();
return
{};
}
if
(
!
info
[
0
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 0 should be an offline spoken language ID pointer."
)
.
ThrowAsJavaScriptException
();
return
{};
}
if
(
!
info
[
1
].
IsExternal
())
{
Napi
::
TypeError
::
New
(
env
,
"Argument 1 should be an offline stream pointer."
)
.
ThrowAsJavaScriptException
();
return
{};
}
SherpaOnnxSpokenLanguageIdentification
*
slid
=
info
[
0
]
.
As
<
Napi
::
External
<
SherpaOnnxSpokenLanguageIdentification
>>
()
.
Data
();
SherpaOnnxOfflineStream
*
stream
=
info
[
1
].
As
<
Napi
::
External
<
SherpaOnnxOfflineStream
>>
().
Data
();
const
SherpaOnnxSpokenLanguageIdentificationResult
*
r
=
SherpaOnnxSpokenLanguageIdentificationCompute
(
slid
,
stream
);
std
::
string
lang
=
r
->
lang
;
SherpaOnnxDestroySpokenLanguageIdentificationResult
(
r
);
return
Napi
::
String
::
New
(
env
,
lang
);
}
void
InitSpokenLanguageID
(
Napi
::
Env
env
,
Napi
::
Object
exports
)
{
exports
.
Set
(
Napi
::
String
::
New
(
env
,
"createSpokenLanguageIdentification"
),
Napi
::
Function
::
New
(
env
,
CreateSpokenLanguageIdentificationWrapper
));
exports
.
Set
(
Napi
::
String
::
New
(
env
,
"createSpokenLanguageIdentificationOfflineStream"
),
Napi
::
Function
::
New
(
env
,
SpokenLanguageIdentificationCreateOfflineStreamWrapper
));
exports
.
Set
(
Napi
::
String
::
New
(
env
,
"spokenLanguageIdentificationCompute"
),
Napi
::
Function
::
New
(
env
,
SpokenLanguageIdentificationComputeWrapper
));
}
...
...
请
注册
或
登录
后发表评论