Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-02-26 21:17:26 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-02-26 21:17:26 +0800
Commit
87a7030c087b4cc7da3d90195f34965d040a7e5d
87a7030c
1 parent
fb043661
Support using alsa to access the microphone with non-streaming ASR models (#517)
隐藏空白字符变更
内嵌
并排对比
正在显示
4 个修改的文件
包含
206 行增加
和
0 行删除
cmake/cmake_extension.py
setup.py
sherpa-onnx/csrc/CMakeLists.txt
sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc
cmake/cmake_extension.py
查看文件 @
87a7030
...
...
@@ -166,6 +166,7 @@ class BuildExtension(build_ext):
if
enable_alsa
():
binaries
+=
[
"sherpa-onnx-alsa"
]
binaries
+=
[
"sherpa-onnx-alsa-offline"
]
binaries
+=
[
"sherpa-onnx-offline-tts-play-alsa"
]
binaries
+=
[
"sherpa-onnx-alsa-offline-speaker-identification"
]
...
...
setup.py
查看文件 @
87a7030
...
...
@@ -59,6 +59,7 @@ def get_binaries_to_install():
if
enable_alsa
():
binaries
+=
[
"sherpa-onnx-alsa"
]
binaries
+=
[
"sherpa-onnx-alsa-offline"
]
binaries
+=
[
"sherpa-onnx-offline-tts-play-alsa"
]
binaries
+=
[
"sherpa-onnx-alsa-offline-speaker-identification"
]
...
...
sherpa-onnx/csrc/CMakeLists.txt
查看文件 @
87a7030
...
...
@@ -231,10 +231,12 @@ endif()
if
(
SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY
)
add_executable
(
sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc
)
add_executable
(
sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc
)
add_executable
(
sherpa-onnx-alsa-offline sherpa-onnx-alsa-offline.cc alsa.cc
)
add_executable
(
sherpa-onnx-alsa-offline-speaker-identification sherpa-onnx-alsa-offline-speaker-identification.cc alsa.cc
)
set
(
exes
sherpa-onnx-alsa
sherpa-onnx-alsa-offline
sherpa-onnx-offline-tts-play-alsa
sherpa-onnx-alsa-offline-speaker-identification
)
...
...
sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc
0 → 100644
查看文件 @
87a7030
// sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc
//
// Copyright (c) 2022-2024 Xiaomi Corporation
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <cctype> // std::tolower
#include <chrono> // NOLINT
#include <mutex> // NOLINT
#include <thread> // NOLINT
#include "sherpa-onnx/csrc/alsa.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
enum
class
State
{
kIdle
,
kRecording
,
kDecoding
,
};
State
state
=
State
::
kIdle
;
// true to stop the program and exit
bool
stop
=
false
;
std
::
vector
<
float
>
samples
;
std
::
mutex
samples_mutex
;
static
void
DetectKeyPress
()
{
SHERPA_ONNX_LOGE
(
"Press Enter to start"
);
int32_t
key
;
while
(
!
stop
&&
(
key
=
getchar
()))
{
if
(
key
!=
0x0a
)
{
continue
;
}
switch
(
state
)
{
case
State
:
:
kIdle
:
SHERPA_ONNX_LOGE
(
"Start recording. Press Enter to stop recording"
);
state
=
State
::
kRecording
;
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
samples_mutex
);
samples
.
clear
();
}
break
;
case
State
:
:
kRecording
:
SHERPA_ONNX_LOGE
(
"Stop recording. Decoding ..."
);
state
=
State
::
kDecoding
;
break
;
case
State
:
:
kDecoding
:
break
;
}
}
}
static
void
Record
(
const
char
*
device_name
,
int32_t
expected_sample_rate
)
{
sherpa_onnx
::
Alsa
alsa
(
device_name
);
if
(
alsa
.
GetExpectedSampleRate
()
!=
expected_sample_rate
)
{
fprintf
(
stderr
,
"sample rate: %d != %d
\n
"
,
alsa
.
GetExpectedSampleRate
(),
expected_sample_rate
);
exit
(
-
1
);
}
int32_t
chunk
=
0.1
*
alsa
.
GetActualSampleRate
();
while
(
!
stop
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
samples_mutex
);
const
std
::
vector
<
float
>
&
s
=
alsa
.
Read
(
chunk
);
samples
.
insert
(
samples
.
end
(),
s
.
begin
(),
s
.
end
());
}
}
static
void
Handler
(
int32_t
sig
)
{
stop
=
true
;
fprintf
(
stderr
,
"
\n
Caught Ctrl + C. Press Enter to exit
\n
"
);
}
int32_t
main
(
int32_t
argc
,
char
*
argv
[])
{
signal
(
SIGINT
,
Handler
);
const
char
*
kUsageMessage
=
R"usage(
This program uses non-streaming models with microphone for speech recognition.
Usage:
(1) Transducer from icefall
./bin/sherpa-onnx-alsa-offline \
--tokens=/path/to/tokens.txt \
--encoder=/path/to/encoder.onnx \
--decoder=/path/to/decoder.onnx \
--joiner=/path/to/joiner.onnx \
--num-threads=2 \
--decoding-method=greedy_search \
device_name
(2) Paraformer from FunASR
./bin/sherpa-onnx-alsa-offline \
--tokens=/path/to/tokens.txt \
--paraformer=/path/to/model.onnx \
--num-threads=1 \
device_name
(3) Whisper models
./bin/sherpa-onnx-alsa-offline \
--whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
--whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
--tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
--num-threads=1 \
device_name
Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
The device name specifies which microphone to use in case there are several
on you system. You can use
arecord -l
to find all available microphones on your computer. For instance, if it outputs
**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
Subdevices: 1/1
Subdevice #0: subdevice #0
and if you want to select card 3 and the device 0 on that card, please use:
plughw:3,0
as the device_name.
)usage"
;
sherpa_onnx
::
ParseOptions
po
(
kUsageMessage
);
sherpa_onnx
::
OfflineRecognizerConfig
config
;
config
.
Register
(
&
po
);
po
.
Read
(
argc
,
argv
);
if
(
po
.
NumArgs
()
!=
1
)
{
fprintf
(
stderr
,
"Please provide only 1 argument: the device name
\n
"
);
po
.
PrintUsage
();
exit
(
EXIT_FAILURE
);
}
fprintf
(
stderr
,
"%s
\n
"
,
config
.
ToString
().
c_str
());
if
(
!
config
.
Validate
())
{
fprintf
(
stderr
,
"Errors in config!
\n
"
);
return
-
1
;
}
SHERPA_ONNX_LOGE
(
"Creating recognizer ..."
);
sherpa_onnx
::
OfflineRecognizer
recognizer
(
config
);
SHERPA_ONNX_LOGE
(
"Recognizer created!"
);
std
::
string
device_name
=
po
.
GetArg
(
1
);
fprintf
(
stderr
,
"Use recording device: %s
\n
"
,
device_name
.
c_str
());
int32_t
sample_rate
=
config
.
feat_config
.
sampling_rate
;
std
::
thread
t
(
DetectKeyPress
);
std
::
thread
t2
(
Record
,
device_name
.
c_str
(),
sample_rate
);
while
(
!
stop
)
{
switch
(
state
)
{
case
State
:
:
kIdle
:
break
;
case
State
:
:
kRecording
:
break
;
case
State
:
:
kDecoding
:
{
std
::
vector
<
float
>
buf
;
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
samples_mutex
);
buf
=
std
::
move
(
samples
);
}
auto
s
=
recognizer
.
CreateStream
();
s
->
AcceptWaveform
(
sample_rate
,
buf
.
data
(),
buf
.
size
());
recognizer
.
DecodeStream
(
s
.
get
());
SHERPA_ONNX_LOGE
(
"Decoding Done! Result is:"
);
SHERPA_ONNX_LOGE
(
"%s"
,
s
->
GetResult
().
text
.
c_str
());
state
=
State
::
kIdle
;
SHERPA_ONNX_LOGE
(
"Press Enter to start"
);
break
;
}
}
using
namespace
std
::
chrono_literals
;
std
::
this_thread
::
sleep_for
(
20
ms
);
// sleep for 20ms
}
t
.
join
();
t2
.
join
();
return
0
;
}
...
...
请
注册
或
登录
后发表评论