Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-06-23 19:43:53 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-06-23 19:43:53 +0800
Commit
7a663afb7667917b6521ff19270e41769e194b54
7a663afb
1 parent
29abf242
Add streaming ASR example from a microphone for Java API (#1047)
显示空白字符变更
内嵌
并排对比
正在显示
3 个修改的文件
包含
173 行增加
和
0 行删除
java-api-examples/README.md
java-api-examples/StreamingAsrFromMicTransducer.java
java-api-examples/run-streaming-asr-from-mic-transducer.sh
java-api-examples/README.md
查看文件 @
7a663af
...
...
@@ -7,6 +7,7 @@ This directory contains examples for the JAVA API of sherpa-onnx.
## Streaming Speech recognition
```
./run-streaming-asr-from-mic-transducer.sh
./run-streaming-decode-file-ctc.sh
./run-streaming-decode-file-ctc-hlg.sh
./run-streaming-decode-file-paraformer.sh
...
...
java-api-examples/StreamingAsrFromMicTransducer.java
0 → 100644
查看文件 @
7a663af
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation
// This file shows how to use an online transducer, i.e., streaming transducer,
// for real-time speech recognition with a microphone.
import
com.k2fsa.sherpa.onnx.*
;
import
javax.sound.sampled.*
;
public
class
StreamingAsrFromMicTransducer
{
public
static
void
main
(
String
[]
args
)
{
// please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
// to download model files
String
encoder
=
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"
;
String
decoder
=
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
;
String
joiner
=
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"
;
String
tokens
=
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"
;
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
String
ruleFsts
=
"./itn_zh_number.fst"
;
int
sampleRate
=
16000
;
OnlineTransducerModelConfig
transducer
=
OnlineTransducerModelConfig
.
builder
()
.
setEncoder
(
encoder
)
.
setDecoder
(
decoder
)
.
setJoiner
(
joiner
)
.
build
();
OnlineModelConfig
modelConfig
=
OnlineModelConfig
.
builder
()
.
setTransducer
(
transducer
)
.
setTokens
(
tokens
)
.
setNumThreads
(
1
)
.
setDebug
(
true
)
.
build
();
OnlineRecognizerConfig
config
=
OnlineRecognizerConfig
.
builder
()
.
setOnlineModelConfig
(
modelConfig
)
.
setDecodingMethod
(
"greedy_search"
)
.
setRuleFsts
(
ruleFsts
)
.
build
();
OnlineRecognizer
recognizer
=
new
OnlineRecognizer
(
config
);
OnlineStream
stream
=
recognizer
.
createStream
();
// https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
// Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
AudioFormat
format
=
new
AudioFormat
(
sampleRate
,
16
,
1
,
true
,
false
);
// https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
DataLine
.
Info
info
=
new
DataLine
.
Info
(
TargetDataLine
.
class
,
format
);
TargetDataLine
targetDataLine
;
try
{
targetDataLine
=
(
TargetDataLine
)
AudioSystem
.
getLine
(
info
);
targetDataLine
.
open
(
format
);
targetDataLine
.
start
();
}
catch
(
LineUnavailableException
e
)
{
System
.
out
.
println
(
"Failed to open target data line: "
+
e
.
getMessage
());
recognizer
.
release
();
stream
.
release
();
return
;
}
String
lastText
=
""
;
int
segmentIndex
=
0
;
// You can choose an arbitrary number
int
bufferSize
=
1600
;
// 0.1 seconds for 16000Hz
byte
[]
buffer
=
new
byte
[
bufferSize
*
2
];
// a short has 2 bytes
float
[]
samples
=
new
float
[
bufferSize
];
System
.
out
.
println
(
"Started! Please speak"
);
while
(
targetDataLine
.
isOpen
())
{
int
n
=
targetDataLine
.
read
(
buffer
,
0
,
buffer
.
length
);
if
(
n
<=
0
)
{
System
.
out
.
printf
(
"Got %d bytes. Expected %d bytes.\n"
,
n
,
buffer
.
length
);
continue
;
}
for
(
int
i
=
0
;
i
!=
bufferSize
;
++
i
)
{
short
low
=
buffer
[
2
*
i
];
short
high
=
buffer
[
2
*
i
+
1
];
int
s
=
(
high
<<
8
)
+
low
;
samples
[
i
]
=
(
float
)
s
/
32768
;
}
stream
.
acceptWaveform
(
samples
,
sampleRate
);
while
(
recognizer
.
isReady
(
stream
))
{
recognizer
.
decode
(
stream
);
}
String
text
=
recognizer
.
getResult
(
stream
).
getText
();
boolean
isEndpoint
=
recognizer
.
isEndpoint
(
stream
);
if
(!
text
.
isEmpty
()
&&
text
!=
" "
&&
lastText
!=
text
)
{
lastText
=
text
;
System
.
out
.
printf
(
"%d: %s\r"
,
segmentIndex
,
text
);
}
if
(
isEndpoint
)
{
if
(!
text
.
isEmpty
())
{
System
.
out
.
println
();
segmentIndex
+=
1
;
}
recognizer
.
reset
(
stream
);
}
}
// while (targetDataLine.isOpen())
stream
.
release
();
recognizer
.
release
();
}
}
...
...
java-api-examples/run-streaming-asr-from-mic-transducer.sh
0 → 100755
查看文件 @
7a663af
#!/usr/bin/env bash
set
-ex
if
[[
! -f ../build/lib/libsherpa-onnx-jni.dylib
&&
! -f ../build/lib/libsherpa-onnx-jni.so
]]
;
then
mkdir -p ../build
pushd
../build
cmake
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
-DSHERPA_ONNX_ENABLE_JNI
=
ON
\
..
make -j4
ls -lh lib
popd
fi
if
[
! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar
]
;
then
pushd
../sherpa-onnx/java-api
make
popd
fi
if
[[
! -f ../build/lib/libsherpa-onnx-jni.dylib
&&
! -f ../build/lib/libsherpa-onnx-jni.so
]]
;
then
cmake
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
-DSHERPA_ONNX_ENABLE_JNI
=
ON
\
..
make -j4
ls -lh lib
fi
if
[
! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi
if
[
! -f ./itn_zh_number.fst
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
java
\
-Djava.library.path
=
$PWD
/../build/lib
\
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar
\
./StreamingAsrFromMicTransducer.java
...
...
请
注册
或
登录
后发表评论