Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-05-13 19:08:09 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-05-13 19:08:09 +0800
Commit
fdda292d5a351cf2789e8ab5efaa7344e138842c
fdda292d
1 parent
9a0e16f0
Add alsa-based streaming ASR example for sense voice. (#2207)
隐藏空白字符变更
内嵌
并排对比
正在显示
6 个修改的文件
包含
273 行增加
和
6 行删除
.github/workflows/aarch64-linux-gnu-shared.yaml
.github/workflows/aarch64-linux-gnu-static.yaml
.github/workflows/android.yaml
cxx-api-examples/CMakeLists.txt
cxx-api-examples/sense-voice-simulate-streaming-alsa-cxx-api.cc
cxx-api-examples/sherpa-display.h
.github/workflows/aarch64-linux-gnu-shared.yaml
查看文件 @
fdda292
...
...
@@ -82,6 +82,8 @@ jobs:
..
make -j4 install
cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
rm -rf install/lib/pkgconfig
rm -fv install/lib/cargs.h
rm -fv install/lib/libcargs.so
...
...
@@ -126,6 +128,8 @@ jobs:
make -j4 install
cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
rm -rf install/lib/pkgconfig
rm -fv install/lib/cargs.h
rm -fv install/lib/libcargs.so
...
...
@@ -242,7 +246,7 @@ jobs:
file
:
sherpa-onnx-*linux-aarch64*.tar.bz2
# repo_name: k2-fsa/sherpa-onnx
# repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
# tag: v1.11.
1
# tag: v1.11.
5
-
name
:
Test offline Moonshine
if
:
matrix.build_type != 'Debug'
...
...
.github/workflows/aarch64-linux-gnu-static.yaml
查看文件 @
fdda292
...
...
@@ -83,6 +83,8 @@ jobs:
make install
cp bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
ls -lh install/lib
rm -rf install/lib/pkgconfig
...
...
@@ -164,7 +166,7 @@ jobs:
file
:
sherpa-onnx-*linux-aarch64*.tar.bz2
# repo_name: k2-fsa/sherpa-onnx
# repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
# tag: v1.1
0.42
# tag: v1.1
1.5
-
name
:
Test offline Moonshine
if
:
matrix.build_type != 'Debug'
...
...
.github/workflows/android.yaml
查看文件 @
fdda292
...
...
@@ -168,7 +168,7 @@ jobs:
file
:
sherpa-onnx-*-android.tar.bz2
# repo_name: k2-fsa/sherpa-onnx
# repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
# tag: v1.11.
3
# tag: v1.11.
5
build-android-aar
:
needs
:
[
build-android-libs
]
...
...
@@ -297,7 +297,7 @@ jobs:
file
:
./*.aar
# repo_name: k2-fsa/sherpa-onnx
# repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
# tag: v1.11.
3
# tag: v1.11.
5
-
name
:
Release android aar
if
:
github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
...
...
cxx-api-examples/CMakeLists.txt
查看文件 @
fdda292
...
...
@@ -47,6 +47,23 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
)
endif
()
if
(
SHERPA_ONNX_HAS_ALSA
)
add_executable
(
sense-voice-simulate-streaming-alsa-cxx-api
./sense-voice-simulate-streaming-alsa-cxx-api.cc
${
CMAKE_CURRENT_LIST_DIR
}
/../sherpa-onnx/csrc/alsa.cc
)
target_link_libraries
(
sense-voice-simulate-streaming-alsa-cxx-api
sherpa-onnx-cxx-api
portaudio_static
)
if
(
DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR}
)
target_link_libraries
(
sense-voice-simulate-streaming-alsa-cxx-api -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound
)
else
()
target_link_libraries
(
sense-voice-simulate-streaming-alsa-cxx-api asound
)
endif
()
endif
()
add_executable
(
sense-voice-with-hr-cxx-api ./sense-voice-with-hr-cxx-api.cc
)
target_link_libraries
(
sense-voice-with-hr-cxx-api sherpa-onnx-cxx-api
)
...
...
cxx-api-examples/sense-voice-simulate-streaming-alsa-cxx-api.cc
0 → 100644
查看文件 @
fdda292
// cxx-api-examples/sense-voice-simulate-streaming-alsa-cxx-api.cc
// Copyright (c) 2025 Xiaomi Corporation
//
// This file demonstrates how to use sense voice with sherpa-onnx's C++ API
// for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
//
// clang-format on
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <chrono> // NOLINT
#include <condition_variable> // NOLINT
#include <iostream>
#include <mutex> // NOLINT
#include <queue>
#include <thread>
#include <vector>
#include "portaudio.h" // NOLINT
#include "sherpa-display.h" // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/alsa.h"
std
::
queue
<
std
::
vector
<
float
>>
samples_queue
;
std
::
condition_variable
condition_variable
;
std
::
mutex
mutex
;
bool
stop
=
false
;
static
void
Handler
(
int32_t
/*sig*/
)
{
stop
=
true
;
condition_variable
.
notify_one
();
fprintf
(
stderr
,
"
\n
Caught Ctrl + C. Exiting...
\n
"
);
}
static
void
RecordCallback
(
sherpa_onnx
::
Alsa
*
alsa
)
{
int32_t
chunk
=
0.1
*
alsa
->
GetActualSampleRate
();
while
(
!
stop
)
{
std
::
vector
<
float
>
samples
=
alsa
->
Read
(
chunk
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex
);
samples_queue
.
emplace
(
std
::
move
(
samples
));
condition_variable
.
notify_one
();
}
}
static
sherpa_onnx
::
cxx
::
VoiceActivityDetector
CreateVad
()
{
using
namespace
sherpa_onnx
::
cxx
;
// NOLINT
VadModelConfig
config
;
config
.
silero_vad
.
model
=
"./silero_vad.onnx"
;
config
.
silero_vad
.
threshold
=
0.5
;
config
.
silero_vad
.
min_silence_duration
=
0.1
;
config
.
silero_vad
.
min_speech_duration
=
0.25
;
config
.
silero_vad
.
max_speech_duration
=
8
;
config
.
sample_rate
=
16000
;
config
.
debug
=
false
;
VoiceActivityDetector
vad
=
VoiceActivityDetector
::
Create
(
config
,
20
);
if
(
!
vad
.
Get
())
{
std
::
cerr
<<
"Failed to create VAD. Please check your config
\n
"
;
exit
(
-
1
);
}
return
vad
;
}
static
sherpa_onnx
::
cxx
::
OfflineRecognizer
CreateOfflineRecognizer
()
{
using
namespace
sherpa_onnx
::
cxx
;
// NOLINT
OfflineRecognizerConfig
config
;
config
.
model_config
.
sense_voice
.
model
=
"./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"
;
config
.
model_config
.
sense_voice
.
use_itn
=
false
;
config
.
model_config
.
sense_voice
.
language
=
"auto"
;
config
.
model_config
.
tokens
=
"./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
;
config
.
model_config
.
num_threads
=
2
;
config
.
model_config
.
debug
=
false
;
std
::
cout
<<
"Loading model
\n
"
;
OfflineRecognizer
recognizer
=
OfflineRecognizer
::
Create
(
config
);
if
(
!
recognizer
.
Get
())
{
std
::
cerr
<<
"Please check your config
\n
"
;
exit
(
-
1
);
}
std
::
cout
<<
"Loading model done
\n
"
;
return
recognizer
;
}
int32_t
main
(
int32_t
argc
,
const
char
*
argv
[])
{
const
char
*
kUsageMessage
=
R"usage(
Usage:
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
./sense-voice-simulate-streaming-alsa-cxx-api device_name
The device name specifies which microphone to use in case there are several
on your system. You can use
arecord -l
to find all available microphones on your computer. For instance, if it outputs
**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
Subdevices: 1/1
Subdevice #0: subdevice #0
and if you want to select card 3 and device 0 on that card, please use:
plughw:3,0
as the device_name.
)usage"
;
if
(
argc
!=
2
)
{
fprintf
(
stderr
,
"%s
\n
"
,
kUsageMessage
);
return
-
1
;
}
signal
(
SIGINT
,
Handler
);
using
namespace
sherpa_onnx
::
cxx
;
// NOLINT
auto
vad
=
CreateVad
();
auto
recognizer
=
CreateOfflineRecognizer
();
int32_t
expected_sample_rate
=
16000
;
std
::
string
device_name
=
argv
[
1
];
sherpa_onnx
::
Alsa
alsa
(
device_name
.
c_str
());
fprintf
(
stderr
,
"Use recording device: %s
\n
"
,
device_name
.
c_str
());
if
(
alsa
.
GetExpectedSampleRate
()
!=
expected_sample_rate
)
{
fprintf
(
stderr
,
"sample rate: %d != %d
\n
"
,
alsa
.
GetExpectedSampleRate
(),
expected_sample_rate
);
exit
(
-
1
);
}
int32_t
window_size
=
512
;
// samples, please don't change
int32_t
offset
=
0
;
std
::
vector
<
float
>
buffer
;
bool
speech_started
=
false
;
auto
started_time
=
std
::
chrono
::
steady_clock
::
now
();
SherpaDisplay
display
;
std
::
thread
record_thread
(
RecordCallback
,
&
alsa
);
std
::
cout
<<
"Started! Please speak
\n
"
;
while
(
!
stop
)
{
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex
);
while
(
samples_queue
.
empty
()
&&
!
stop
)
{
condition_variable
.
wait
(
lock
);
}
const
auto
&
s
=
samples_queue
.
front
();
buffer
.
insert
(
buffer
.
end
(),
s
.
begin
(),
s
.
end
());
samples_queue
.
pop
();
}
for
(;
offset
+
window_size
<
buffer
.
size
();
offset
+=
window_size
)
{
vad
.
AcceptWaveform
(
buffer
.
data
()
+
offset
,
window_size
);
if
(
!
speech_started
&&
vad
.
IsDetected
())
{
speech_started
=
true
;
started_time
=
std
::
chrono
::
steady_clock
::
now
();
}
}
if
(
!
speech_started
)
{
if
(
buffer
.
size
()
>
10
*
window_size
)
{
offset
-=
buffer
.
size
()
-
10
*
window_size
;
buffer
=
{
buffer
.
end
()
-
10
*
window_size
,
buffer
.
end
()};
}
}
auto
current_time
=
std
::
chrono
::
steady_clock
::
now
();
const
float
elapsed_seconds
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
current_time
-
started_time
)
.
count
()
/
1000.
;
if
(
speech_started
&&
elapsed_seconds
>
0.2
)
{
OfflineStream
stream
=
recognizer
.
CreateStream
();
stream
.
AcceptWaveform
(
expected_sample_rate
,
buffer
.
data
(),
buffer
.
size
());
recognizer
.
Decode
(
&
stream
);
OfflineRecognizerResult
result
=
recognizer
.
GetResult
(
&
stream
);
display
.
UpdateText
(
result
.
text
);
display
.
Display
();
started_time
=
std
::
chrono
::
steady_clock
::
now
();
}
while
(
!
vad
.
IsEmpty
())
{
auto
segment
=
vad
.
Front
();
vad
.
Pop
();
OfflineStream
stream
=
recognizer
.
CreateStream
();
stream
.
AcceptWaveform
(
expected_sample_rate
,
segment
.
samples
.
data
(),
segment
.
samples
.
size
());
recognizer
.
Decode
(
&
stream
);
OfflineRecognizerResult
result
=
recognizer
.
GetResult
(
&
stream
);
display
.
UpdateText
(
result
.
text
);
display
.
FinalizeCurrentSentence
();
display
.
Display
();
buffer
.
clear
();
offset
=
0
;
speech_started
=
false
;
}
}
record_thread
.
join
();
return
0
;
}
...
...
cxx-api-examples/sherpa-display.h
查看文件 @
fdda292
...
...
@@ -45,10 +45,11 @@ class SherpaDisplay {
private
:
static
void
ClearScreen
()
{
#ifdef _MSC_VER
system
(
"cls"
);
auto
ret
=
system
(
"cls"
);
#else
system
(
"clear"
);
auto
ret
=
system
(
"clear"
);
#endif
(
void
)
ret
;
}
static
std
::
string
GetCurrentDateTime
()
{
...
...
请
注册
或
登录
后发表评论