Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-05-28 14:59:47 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-05-28 14:59:47 +0800
Commit
bcaa6df3892e74e42bf8cad18aaee951b5b38b44
bcaa6df3
1 parent
b1c7d04c
Add VAD demo for Java API (#928)
显示空白字符变更
内嵌
并排对比
正在显示
14 个修改的文件
包含
604 行增加
和
0 行删除
.github/workflows/run-java-test.yaml
java-api-examples/README.md
java-api-examples/VadNonStreamingParaformer.java
java-api-examples/VadRemoveSilence.java
java-api-examples/run-vad-non-streaming-paraformer.sh
java-api-examples/run-vad-remove-slience.sh
sherpa-onnx/java-api/Makefile
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SileroVadModelConfig.java
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpeechSegment.java
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/Vad.java
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/VadModelConfig.java
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/WaveWriter.java
sherpa-onnx/jni/CMakeLists.txt
sherpa-onnx/jni/wave-writer.cc
.github/workflows/run-java-test.yaml
查看文件 @
bcaa6df
...
...
@@ -100,12 +100,32 @@ jobs:
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
-
name
:
Run java test (VAD + Non-streaming Paraformer)
shell
:
bash
run
:
|
cd ./java-api-examples
./run-vad-non-streaming-paraformer.sh
rm *.onnx
ls -lh *.wav
rm *.wav
rm -rf sherpa-onnx-*
-
name
:
Run java test (VAD remove silence)
shell
:
bash
run
:
|
cd ./java-api-examples
./run-vad-remove-slience.sh
rm *.onnx
ls -lh *.wav
rm *.wav
-
name
:
Run java test (speaker identification)
shell
:
bash
run
:
|
...
...
java-api-examples/README.md
查看文件 @
bcaa6df
...
...
@@ -56,3 +56,15 @@ The punctuation model supports both English and Chinese.
```
bash
./run-speaker-identification.sh
```
## VAD (Remove silence)
```
bash
./run-vad-remove-slience.sh
```
## VAD + Non-streaming Paraformer for speech recognition
```
bash
./run-vad-non-streaming-paraformer.sh
```
...
...
java-api-examples/VadNonStreamingParaformer.java
0 → 100644
查看文件 @
bcaa6df
// Copyright 2024 Xiaomi Corporation
// This file shows how to use a silero_vad model with a non-streaming Paraformer
// for speech recognition.
import
com.k2fsa.sherpa.onnx.*
;
import
java.util.Arrays
;
public
class
VadNonStreamingParaformer
{
public
static
Vad
createVad
()
{
// please download ./silero_vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
String
model
=
"./silero_vad.onnx"
;
SileroVadModelConfig
sileroVad
=
SileroVadModelConfig
.
builder
()
.
setModel
(
model
)
.
setThreshold
(
0.5f
)
.
setMinSilenceDuration
(
0.25f
)
.
setMinSpeechDuration
(
0.5f
)
.
setWindowSize
(
512
)
.
build
();
VadModelConfig
config
=
VadModelConfig
.
builder
()
.
setSileroVadModelConfig
(
sileroVad
)
.
setSampleRate
(
16000
)
.
setNumThreads
(
1
)
.
setDebug
(
true
)
.
setProvider
(
"cpu"
)
.
build
();
return
new
Vad
(
config
);
}
public
static
OfflineRecognizer
createOfflineRecognizer
()
{
// please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese-english
// to download model files
String
model
=
"./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx"
;
String
tokens
=
"./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt"
;
String
waveFilename
=
"./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/3-sichuan.wav"
;
WaveReader
reader
=
new
WaveReader
(
waveFilename
);
OfflineParaformerModelConfig
paraformer
=
OfflineParaformerModelConfig
.
builder
().
setModel
(
model
).
build
();
OfflineModelConfig
modelConfig
=
OfflineModelConfig
.
builder
()
.
setParaformer
(
paraformer
)
.
setTokens
(
tokens
)
.
setNumThreads
(
1
)
.
setDebug
(
true
)
.
build
();
OfflineRecognizerConfig
config
=
OfflineRecognizerConfig
.
builder
()
.
setOfflineModelConfig
(
modelConfig
)
.
setDecodingMethod
(
"greedy_search"
)
.
build
();
return
new
OfflineRecognizer
(
config
);
}
public
static
void
main
(
String
[]
args
)
{
Vad
vad
=
createVad
();
OfflineRecognizer
recognizer
=
createOfflineRecognizer
();
// You can download the test file from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
String
testWaveFilename
=
"./lei-jun-test.wav"
;
WaveReader
reader
=
new
WaveReader
(
testWaveFilename
);
int
numSamples
=
reader
.
getSamples
().
length
;
int
numIter
=
numSamples
/
512
;
for
(
int
i
=
0
;
i
!=
numIter
;
++
i
)
{
int
start
=
i
*
512
;
int
end
=
start
+
512
;
float
[]
samples
=
Arrays
.
copyOfRange
(
reader
.
getSamples
(),
start
,
end
);
vad
.
acceptWaveform
(
samples
);
if
(
vad
.
isSpeechDetected
())
{
while
(!
vad
.
empty
())
{
SpeechSegment
segment
=
vad
.
front
();
float
startTime
=
segment
.
getStart
()
/
16000.0f
;
float
duration
=
segment
.
getSamples
().
length
/
16000.0f
;
OfflineStream
stream
=
recognizer
.
createStream
();
stream
.
acceptWaveform
(
segment
.
getSamples
(),
16000
);
recognizer
.
decode
(
stream
);
String
text
=
recognizer
.
getResult
(
stream
).
getText
();
if
(!
text
.
isEmpty
())
{
System
.
out
.
printf
(
"%.3f--%.3f: %s\n"
,
startTime
,
startTime
+
duration
,
text
);
}
vad
.
pop
();
}
}
}
}
}
...
...
java-api-examples/VadRemoveSilence.java
0 → 100644
查看文件 @
bcaa6df
// Copyright 2024 Xiaomi Corporation
// This file shows how to use a silero_vad model to remove silences from
// a wave file.
import
com.k2fsa.sherpa.onnx.*
;
import
java.util.ArrayList
;
import
java.util.Arrays
;
public
class
VadRemoveSilence
{
public
static
void
main
(
String
[]
args
)
{
// please download ./silero_vad.onnx from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
String
model
=
"./silero_vad.onnx"
;
SileroVadModelConfig
sileroVad
=
SileroVadModelConfig
.
builder
()
.
setModel
(
model
)
.
setThreshold
(
0.5f
)
.
setMinSilenceDuration
(
0.25f
)
.
setMinSpeechDuration
(
0.5f
)
.
setWindowSize
(
512
)
.
build
();
VadModelConfig
config
=
VadModelConfig
.
builder
()
.
setSileroVadModelConfig
(
sileroVad
)
.
setSampleRate
(
16000
)
.
setNumThreads
(
1
)
.
setDebug
(
true
)
.
setProvider
(
"cpu"
)
.
build
();
Vad
vad
=
new
Vad
(
config
);
// You can download the test file from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
String
testWaveFilename
=
"./lei-jun-test.wav"
;
WaveReader
reader
=
new
WaveReader
(
testWaveFilename
);
int
numSamples
=
reader
.
getSamples
().
length
;
int
numIter
=
numSamples
/
512
;
ArrayList
<
float
[]>
segments
=
new
ArrayList
<
float
[]>();
for
(
int
i
=
0
;
i
!=
numIter
;
++
i
)
{
int
start
=
i
*
512
;
int
end
=
start
+
512
;
float
[]
samples
=
Arrays
.
copyOfRange
(
reader
.
getSamples
(),
start
,
end
);
vad
.
acceptWaveform
(
samples
);
if
(
vad
.
isSpeechDetected
())
{
while
(!
vad
.
empty
())
{
// if you want to get the starting time of this segment, you can use
/* float startTime = vad.front().getStart() / 16000.0f; */
segments
.
add
(
vad
.
front
().
getSamples
());
vad
.
pop
();
}
}
}
// get total number of samples
int
n
=
0
;
for
(
float
[]
s
:
segments
)
{
n
+=
s
.
length
;
}
float
[]
allSamples
=
new
float
[
n
];
int
i
=
0
;
for
(
float
[]
s
:
segments
)
{
System
.
arraycopy
(
s
,
0
,
allSamples
,
i
,
s
.
length
);
i
+=
s
.
length
;
}
String
outFilename
=
"lei-jun-test-no-silence.wav"
;
WaveWriter
.
write
(
outFilename
,
allSamples
,
16000
);
System
.
out
.
printf
(
"Saved to %s\n"
,
outFilename
);
}
}
...
...
java-api-examples/run-vad-non-streaming-paraformer.sh
0 → 100755
查看文件 @
bcaa6df
#!/usr/bin/env bash
set
-ex
if
[[
! -f ../build/lib/libsherpa-onnx-jni.dylib
&&
! -f ../build/lib/libsherpa-onnx-jni.so
]]
;
then
mkdir -p ../build
pushd
../build
cmake
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
-DSHERPA_ONNX_ENABLE_JNI
=
ON
\
..
make -j4
ls -lh lib
popd
fi
if
[
! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar
]
;
then
pushd
../sherpa-onnx/java-api
make
popd
fi
if
[
! -f ./silero_vad.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if
[
! -f ./lei-jun-test.wav
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi
if
[
! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
fi
java
\
-Djava.library.path
=
$PWD
/../build/lib
\
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar
\
./VadNonStreamingParaformer.java
...
...
java-api-examples/run-vad-remove-slience.sh
0 → 100755
查看文件 @
bcaa6df
#!/usr/bin/env bash
set
-ex
if
[[
! -f ../build/lib/libsherpa-onnx-jni.dylib
&&
! -f ../build/lib/libsherpa-onnx-jni.so
]]
;
then
mkdir -p ../build
pushd
../build
cmake
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
-DSHERPA_ONNX_ENABLE_JNI
=
ON
\
..
make -j4
ls -lh lib
popd
fi
if
[
! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar
]
;
then
pushd
../sherpa-onnx/java-api
make
popd
fi
if
[
! -f ./silero_vad.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if
[
! -f ./lei-jun-test.wav
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi
java
\
-Djava.library.path
=
$PWD
/../build/lib
\
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar
\
./VadRemoveSilence.java
...
...
sherpa-onnx/java-api/Makefile
查看文件 @
bcaa6df
...
...
@@ -7,6 +7,7 @@ out_jar := $(out_dir)/sherpa-onnx.jar
package_dir
:=
com/k2fsa/sherpa/onnx
java_files
:=
WaveReader.java
java_files
+=
WaveWriter.java
java_files
+=
EndpointRule.java
java_files
+=
EndpointConfig.java
java_files
+=
FeatureConfig.java
...
...
@@ -56,6 +57,11 @@ java_files += SpeakerEmbeddingExtractorConfig.java
java_files
+=
SpeakerEmbeddingExtractor.java
java_files
+=
SpeakerEmbeddingManager.java
java_files
+=
SileroVadModelConfig.java
java_files
+=
VadModelConfig.java
java_files
+=
SpeechSegment.java
java_files
+=
Vad.java
class_files
:=
$
(
java_files:%.java
=
%.class
)
java_files
:=
$
(
addprefix src/
$(package_dir)
/,
$(java_files)
)
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SileroVadModelConfig.java
0 → 100644
查看文件 @
bcaa6df
// Copyright 2024 Xiaomi Corporation
package
com
.
k2fsa
.
sherpa
.
onnx
;
public
class
SileroVadModelConfig
{
private
final
String
model
;
private
final
float
threshold
;
private
final
float
minSilenceDuration
;
private
final
float
minSpeechDuration
;
private
final
int
windowSize
;
private
SileroVadModelConfig
(
Builder
builder
)
{
this
.
model
=
builder
.
model
;
this
.
threshold
=
builder
.
threshold
;
this
.
minSilenceDuration
=
builder
.
minSilenceDuration
;
this
.
minSpeechDuration
=
builder
.
minSpeechDuration
;
this
.
windowSize
=
builder
.
windowSize
;
}
public
static
Builder
builder
()
{
return
new
Builder
();
}
public
String
getModel
()
{
return
model
;
}
public
float
getThreshold
()
{
return
threshold
;
}
public
float
getMinSilenceDuration
()
{
return
minSilenceDuration
;
}
public
float
getMinSpeechDuration
()
{
return
minSpeechDuration
;
}
public
int
getWindowSize
()
{
return
windowSize
;
}
public
static
class
Builder
{
private
String
model
=
""
;
private
float
threshold
=
0.5f
;
private
float
minSilenceDuration
=
0.25f
;
private
float
minSpeechDuration
=
0.5f
;
private
int
windowSize
=
512
;
public
SileroVadModelConfig
build
()
{
return
new
SileroVadModelConfig
(
this
);
}
public
Builder
setModel
(
String
model
)
{
this
.
model
=
model
;
return
this
;
}
public
Builder
setThreshold
(
float
threshold
)
{
this
.
threshold
=
threshold
;
return
this
;
}
public
Builder
setMinSilenceDuration
(
float
minSilenceDuration
)
{
this
.
minSilenceDuration
=
minSilenceDuration
;
return
this
;
}
public
Builder
setMinSpeechDuration
(
float
minSpeechDuration
)
{
this
.
minSpeechDuration
=
minSpeechDuration
;
return
this
;
}
public
Builder
setWindowSize
(
int
windowSize
)
{
this
.
windowSize
=
windowSize
;
return
this
;
}
}
}
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpeechSegment.java
0 → 100644
查看文件 @
bcaa6df
package
com
.
k2fsa
.
sherpa
.
onnx
;
public
class
SpeechSegment
{
private
final
int
start
;
private
final
float
[]
samples
;
public
SpeechSegment
(
int
start
,
float
[]
samples
)
{
this
.
start
=
start
;
this
.
samples
=
samples
;
}
public
int
getStart
()
{
return
start
;
}
public
float
[]
getSamples
()
{
return
samples
;
}
}
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/Vad.java
0 → 100644
查看文件 @
bcaa6df
// Copyright 2024 Xiaomi Corporation
package
com
.
k2fsa
.
sherpa
.
onnx
;
public
class
Vad
{
static
{
System
.
loadLibrary
(
"sherpa-onnx-jni"
);
}
private
long
ptr
=
0
;
public
Vad
(
VadModelConfig
config
)
{
ptr
=
newFromFile
(
config
);
}
@Override
protected
void
finalize
()
throws
Throwable
{
release
();
}
public
void
release
()
{
if
(
this
.
ptr
==
0
)
{
return
;
}
delete
(
this
.
ptr
);
this
.
ptr
=
0
;
}
public
void
acceptWaveform
(
float
[]
samples
)
{
acceptWaveform
(
this
.
ptr
,
samples
);
}
public
boolean
empty
()
{
return
empty
(
this
.
ptr
);
}
public
void
pop
()
{
pop
(
this
.
ptr
);
}
public
void
clear
()
{
clear
(
this
.
ptr
);
}
public
void
reset
()
{
reset
(
this
.
ptr
);
}
public
SpeechSegment
front
()
{
Object
[]
arr
=
front
(
this
.
ptr
);
int
start
=
(
int
)
arr
[
0
];
float
[]
samples
=
(
float
[])
arr
[
1
];
return
new
SpeechSegment
(
start
,
samples
);
}
public
boolean
isSpeechDetected
()
{
return
isSpeechDetected
(
this
.
ptr
);
}
private
native
void
delete
(
long
ptr
);
private
native
long
newFromFile
(
VadModelConfig
config
);
private
native
void
acceptWaveform
(
long
ptr
,
float
[]
samples
);
private
native
boolean
empty
(
long
ptr
);
private
native
void
pop
(
long
ptr
);
private
native
void
clear
(
long
ptr
);
private
native
Object
[]
front
(
long
ptr
);
private
native
boolean
isSpeechDetected
(
long
ptr
);
private
native
void
reset
(
long
ptr
);
}
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/VadModelConfig.java
0 → 100644
查看文件 @
bcaa6df
// Copyright 2024 Xiaomi Corporation
package
com
.
k2fsa
.
sherpa
.
onnx
;
public
class
VadModelConfig
{
private
final
SileroVadModelConfig
sileroVadModelConfig
;
private
final
int
sampleRate
;
private
final
int
numThreads
;
private
final
boolean
debug
;
private
final
String
provider
;
private
VadModelConfig
(
Builder
builder
)
{
this
.
sileroVadModelConfig
=
builder
.
sileroVadModelConfig
;
this
.
sampleRate
=
builder
.
sampleRate
;
this
.
numThreads
=
builder
.
numThreads
;
this
.
debug
=
builder
.
debug
;
this
.
provider
=
builder
.
provider
;
}
public
static
Builder
builder
()
{
return
new
Builder
();
}
public
SileroVadModelConfig
getSileroVadModelConfig
()
{
return
sileroVadModelConfig
;
}
public
int
getSampleRate
()
{
return
sampleRate
;
}
public
int
getNumThreads
()
{
return
numThreads
;
}
public
String
getProvider
()
{
return
provider
;
}
public
boolean
getDebug
()
{
return
debug
;
}
public
static
class
Builder
{
private
SileroVadModelConfig
sileroVadModelConfig
=
new
SileroVadModelConfig
.
Builder
().
build
();
private
int
sampleRate
=
16000
;
private
int
numThreads
=
1
;
private
boolean
debug
=
true
;
private
String
provider
=
"cpu"
;
public
VadModelConfig
build
()
{
return
new
VadModelConfig
(
this
);
}
public
Builder
setSileroVadModelConfig
(
SileroVadModelConfig
sileroVadModelConfig
)
{
this
.
sileroVadModelConfig
=
sileroVadModelConfig
;
return
this
;
}
public
Builder
setSampleRate
(
int
sampleRate
)
{
this
.
sampleRate
=
sampleRate
;
return
this
;
}
public
Builder
setNumThreads
(
int
numThreads
)
{
this
.
numThreads
=
numThreads
;
return
this
;
}
public
Builder
setDebug
(
boolean
debug
)
{
this
.
debug
=
debug
;
return
this
;
}
public
Builder
setProvider
(
String
provider
)
{
this
.
provider
=
provider
;
return
this
;
}
}
}
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/WaveWriter.java
0 → 100644
查看文件 @
bcaa6df
// Copyright 2024 Xiaomi Corporation
package
com
.
k2fsa
.
sherpa
.
onnx
;
public
class
WaveWriter
{
public
WaveWriter
()
{
}
public
static
boolean
write
(
String
filename
,
float
[]
samples
,
int
sampleRate
)
{
WaveWriter
w
=
new
WaveWriter
();
return
w
.
writeWaveToFile
(
filename
,
samples
,
sampleRate
);
}
private
native
boolean
writeWaveToFile
(
String
filename
,
float
[]
samples
,
int
sampleRate
);
}
...
...
sherpa-onnx/jni/CMakeLists.txt
查看文件 @
bcaa6df
...
...
@@ -24,6 +24,7 @@ set(sources
spoken-language-identification.cc
voice-activity-detector.cc
wave-reader.cc
wave-writer.cc
)
if
(
SHERPA_ONNX_ENABLE_TTS
)
...
...
sherpa-onnx/jni/wave-writer.cc
0 → 100644
查看文件 @
bcaa6df
// sherpa-onnx/jni/wave-writer.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include "sherpa-onnx/csrc/wave-writer.h"
#include "sherpa-onnx/jni/common.h"
SHERPA_ONNX_EXTERN_C
JNIEXPORT
bool
JNICALL
Java_com_k2fsa_sherpa_onnx_WaveWriter_writeWaveToFile
(
JNIEnv
*
env
,
jclass
/*obj*/
,
jstring
filename
,
jfloatArray
samples
,
jint
sample_rate
)
{
jfloat
*
p
=
env
->
GetFloatArrayElements
(
samples
,
nullptr
);
jsize
n
=
env
->
GetArrayLength
(
samples
);
const
char
*
p_filename
=
env
->
GetStringUTFChars
(
filename
,
nullptr
);
bool
ok
=
sherpa_onnx
::
WriteWave
(
p_filename
,
sample_rate
,
p
,
n
);
env
->
ReleaseFloatArrayElements
(
samples
,
p
,
JNI_ABORT
);
env
->
ReleaseStringUTFChars
(
filename
,
p_filename
);
return
ok
;
}
...
...
请
注册
或
登录
后发表评论