Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-03-08 11:34:48 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-03-08 11:34:48 +0800
Commit
d3287f94940f38d656d1be46b8644dcea337b195
d3287f94
1 parent
e9e8d755
Add Python ASR examples with alsa (#646)
隐藏空白字符变更
内嵌
并排对比
正在显示
12 个修改的文件
包含
326 行增加
和
10 行删除
CMakeLists.txt
python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py
sherpa-onnx/csrc/session.cc
sherpa-onnx/csrc/sherpa-onnx-alsa-offline-speaker-identification.cc
sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc
sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc
sherpa-onnx/python/csrc/CMakeLists.txt
sherpa-onnx/python/csrc/alsa.cc
sherpa-onnx/python/csrc/alsa.h
sherpa-onnx/python/csrc/faked-alsa.cc
sherpa-onnx/python/csrc/sherpa-onnx.cc
sherpa-onnx/python/sherpa_onnx/__init__.py
CMakeLists.txt
查看文件 @
d3287f9
...
...
@@ -146,6 +146,7 @@ include(CheckIncludeFileCXX)
if
(
UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM AND NOT CMAKE_SYSTEM_NAME STREQUAL Android
)
check_include_file_cxx
(
alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA
)
if
(
SHERPA_ONNX_HAS_ALSA
)
message
(
STATUS
"With Alsa"
)
add_definitions
(
-DSHERPA_ONNX_ENABLE_ALSA=1
)
else
()
message
(
WARNING
"\
...
...
python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py
0 → 100755
查看文件 @
d3287f9
#!/usr/bin/env python3
# Real-time speech recognition from a microphone with sherpa-onnx Python API
# with endpoint detection.
#
# Note: This script uses ALSA and works only on Linux systems, especially
# for embedding Linux systems and for running Linux on Windows using WSL.
#
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
# to download pre-trained models
import
argparse
import
sys
from
pathlib
import
Path
import
sherpa_onnx
def
assert_file_exists
(
filename
:
str
):
assert
Path
(
filename
)
.
is_file
(),
(
f
"{filename} does not exist!
\n
"
"Please refer to "
"https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
)
def
get_args
():
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
)
parser
.
add_argument
(
"--tokens"
,
type
=
str
,
required
=
True
,
help
=
"Path to tokens.txt"
,
)
parser
.
add_argument
(
"--encoder"
,
type
=
str
,
required
=
True
,
help
=
"Path to the encoder model"
,
)
parser
.
add_argument
(
"--decoder"
,
type
=
str
,
required
=
True
,
help
=
"Path to the decoder model"
,
)
parser
.
add_argument
(
"--joiner"
,
type
=
str
,
required
=
True
,
help
=
"Path to the joiner model"
,
)
parser
.
add_argument
(
"--decoding-method"
,
type
=
str
,
default
=
"greedy_search"
,
help
=
"Valid values are greedy_search and modified_beam_search"
,
)
parser
.
add_argument
(
"--provider"
,
type
=
str
,
default
=
"cpu"
,
help
=
"Valid values: cpu, cuda, coreml"
,
)
parser
.
add_argument
(
"--hotwords-file"
,
type
=
str
,
default
=
""
,
help
=
"""
The file containing hotwords, one words/phrases per line, and for each
phrase the bpe/cjkchar are separated by a space. For example:
▁HE LL O ▁WORLD
你 好 世 界
"""
,
)
parser
.
add_argument
(
"--hotwords-score"
,
type
=
float
,
default
=
1.5
,
help
=
"""
The hotword score of each token for biasing word/phrase. Used only if
--hotwords-file is given.
"""
,
)
parser
.
add_argument
(
"--blank-penalty"
,
type
=
float
,
default
=
0.0
,
help
=
"""
The penalty applied on blank symbol during decoding.
Note: It is a positive value that would be applied to logits like
this `logits[:, 0] -= blank_penalty` (suppose logits.shape is
[batch_size, vocab] and blank id is 0).
"""
,
)
parser
.
add_argument
(
"--device-name"
,
type
=
str
,
required
=
True
,
help
=
"""
The device name specifies which microphone to use in case there are several
on your system. You can use
arecord -l
to find all available microphones on your computer. For instance, if it outputs
**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
Subdevices: 1/1
Subdevice #0: subdevice #0
and if you want to select card 3 and the device 0 on that card, please use:
plughw:3,0
as the device_name.
"""
,
)
return
parser
.
parse_args
()
def
create_recognizer
(
args
):
assert_file_exists
(
args
.
encoder
)
assert_file_exists
(
args
.
decoder
)
assert_file_exists
(
args
.
joiner
)
assert_file_exists
(
args
.
tokens
)
# Please replace the model files if needed.
# See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
# for download links.
recognizer
=
sherpa_onnx
.
OnlineRecognizer
.
from_transducer
(
tokens
=
args
.
tokens
,
encoder
=
args
.
encoder
,
decoder
=
args
.
decoder
,
joiner
=
args
.
joiner
,
num_threads
=
1
,
sample_rate
=
16000
,
feature_dim
=
80
,
enable_endpoint_detection
=
True
,
rule1_min_trailing_silence
=
2.4
,
rule2_min_trailing_silence
=
1.2
,
rule3_min_utterance_length
=
300
,
# it essentially disables this rule
decoding_method
=
args
.
decoding_method
,
provider
=
args
.
provider
,
hotwords_file
=
args
.
hotwords_file
,
hotwords_score
=
args
.
hotwords_score
,
blank_penalty
=
args
.
blank_penalty
,
)
return
recognizer
def
main
():
args
=
get_args
()
device_name
=
args
.
device_name
print
(
f
"device_name: {device_name}"
)
alsa
=
sherpa_onnx
.
Alsa
(
device_name
)
print
(
"Creating recognizer"
)
recognizer
=
create_recognizer
(
args
)
print
(
"Started! Please speak"
)
sample_rate
=
16000
samples_per_read
=
int
(
0.1
*
sample_rate
)
# 0.1 second = 100 ms
stream
=
recognizer
.
create_stream
()
last_result
=
""
segment_id
=
0
while
True
:
samples
=
alsa
.
read
(
samples_per_read
)
# a blocking read
stream
.
accept_waveform
(
sample_rate
,
samples
)
while
recognizer
.
is_ready
(
stream
):
recognizer
.
decode_stream
(
stream
)
is_endpoint
=
recognizer
.
is_endpoint
(
stream
)
result
=
recognizer
.
get_result
(
stream
)
if
result
and
(
last_result
!=
result
):
last_result
=
result
print
(
"
\r
{}:{}"
.
format
(
segment_id
,
result
),
end
=
""
,
flush
=
True
)
if
is_endpoint
:
if
result
:
print
(
"
\r
{}:{}"
.
format
(
segment_id
,
result
),
flush
=
True
)
segment_id
+=
1
recognizer
.
reset
(
stream
)
if
__name__
==
"__main__"
:
try
:
main
()
except
KeyboardInterrupt
:
print
(
"
\n
Caught Ctrl + C. Exiting"
)
...
...
sherpa-onnx/csrc/session.cc
查看文件 @
d3287f9
...
...
@@ -16,7 +16,7 @@
#endif
#if __ANDROID_API__ >= 27
#include "nnapi_provider_factory.h"
#include "nnapi_provider_factory.h"
// NOLINT
#endif
namespace
sherpa_onnx
{
...
...
sherpa-onnx/csrc/sherpa-onnx-alsa-offline-speaker-identification.cc
查看文件 @
d3287f9
...
...
@@ -276,8 +276,8 @@ as the device_name.
}
}
using
namespace
std
::
chrono_literals
;
std
::
this_thread
::
sleep_for
(
20
ms
);
// sleep for 20ms
using
namespace
std
::
chrono_literals
;
// NOLINT
std
::
this_thread
::
sleep_for
(
20
ms
);
// sleep for 20ms
}
t
.
join
();
...
...
sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc
查看文件 @
d3287f9
...
...
@@ -192,8 +192,8 @@ as the device_name.
}
}
using
namespace
std
::
chrono_literals
;
std
::
this_thread
::
sleep_for
(
20
ms
);
// sleep for 20ms
using
namespace
std
::
chrono_literals
;
// NOLINT
std
::
this_thread
::
sleep_for
(
20
ms
);
// sleep for 20ms
}
t
.
join
();
t2
.
join
();
...
...
sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc
查看文件 @
d3287f9
...
...
@@ -53,10 +53,6 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
and if you want to select card 3 and the device 0 on that card, please use:
hw:3,0
or
plughw:3,0
as the device_name.
...
...
sherpa-onnx/python/csrc/CMakeLists.txt
查看文件 @
d3287f9
include_directories
(
${
CMAKE_SOURCE_DIR
}
)
pybind11_add_module
(
_sherpa_onnx
set
(
srcs
circular-buffer.cc
display.cc
endpoint.cc
...
...
@@ -37,6 +37,13 @@ pybind11_add_module(_sherpa_onnx
vad-model.cc
voice-activity-detector.cc
)
if
(
SHERPA_ONNX_HAS_ALSA
)
list
(
APPEND srcs
${
CMAKE_SOURCE_DIR
}
/sherpa-onnx/csrc/alsa.cc alsa.cc
)
else
()
list
(
APPEND srcs faked-alsa.cc
)
endif
()
pybind11_add_module
(
_sherpa_onnx
${
srcs
}
)
if
(
APPLE
)
execute_process
(
...
...
@@ -54,6 +61,14 @@ endif()
target_link_libraries
(
_sherpa_onnx PRIVATE sherpa-onnx-core
)
if
(
SHERPA_ONNX_HAS_ALSA
)
if
(
DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR}
)
target_link_libraries
(
_sherpa_onnx PRIVATE -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound
)
else
()
target_link_libraries
(
_sherpa_onnx PRIVATE asound
)
endif
()
endif
()
install
(
TARGETS _sherpa_onnx
DESTINATION ../
)
...
...
sherpa-onnx/python/csrc/alsa.cc
0 → 100644
查看文件 @
d3287f9
// sherpa-onnx/python/csrc/alsa.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include "sherpa-onnx/python/csrc/alsa.h"
#include <vector>
#include "sherpa-onnx/csrc/alsa.h"
namespace
sherpa_onnx
{
void
PybindAlsa
(
py
::
module
*
m
)
{
using
PyClass
=
Alsa
;
py
::
class_
<
PyClass
>
(
*
m
,
"Alsa"
)
.
def
(
py
::
init
<
const
char
*>
(),
py
::
arg
(
"device_name"
),
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"read"
,
[](
PyClass
&
self
,
int32_t
num_samples
)
->
std
::
vector
<
float
>
{
return
self
.
Read
(
num_samples
);
},
py
::
arg
(
"num_samples"
),
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def_property_readonly
(
"expected_sample_rate"
,
&
PyClass
::
GetExpectedSampleRate
)
.
def_property_readonly
(
"actual_sample_rate"
,
&
PyClass
::
GetActualSampleRate
);
}
}
// namespace sherpa_onnx
...
...
sherpa-onnx/python/csrc/alsa.h
0 → 100644
查看文件 @
d3287f9
// sherpa-onnx/python/csrc/alsa.h
//
// Copyright (c) 2024 Xiaomi Corporation
#ifndef SHERPA_ONNX_PYTHON_CSRC_ALSA_H_
#define SHERPA_ONNX_PYTHON_CSRC_ALSA_H_
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
namespace
sherpa_onnx
{
void
PybindAlsa
(
py
::
module
*
m
);
}
// namespace sherpa_onnx
#endif // SHERPA_ONNX_PYTHON_CSRC_ALSA_H_
...
...
sherpa-onnx/python/csrc/faked-alsa.cc
0 → 100644
查看文件 @
d3287f9
// sherpa-onnx/python/csrc/faked-alsa.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/python/csrc/alsa.h"
namespace
sherpa_onnx
{
class
FakedAlsa
{
public
:
explicit
FakedAlsa
(
const
char
*
)
{
SHERPA_ONNX_LOGE
(
"This function is for Linux only."
);
#if (SHERPA_ONNX_ENABLE_ALSA == 0) && (defined(__unix__) || defined(__unix))
SHERPA_ONNX_LOGE
(
R"doc(
sherpa-onnx is compiled without alsa support. To enable that, please run
(1) sudo apt-get install alsa-utils libasound2-dev
(2) rebuild sherpa-onnx
)doc"
);
#endif
exit
(
-
1
);
}
std
::
vector
<
float
>
Read
(
int32_t
)
const
{
return
{};
}
int32_t
GetExpectedSampleRate
()
const
{
return
-
1
;
}
int32_t
GetActualSampleRate
()
const
{
return
-
1
;
}
};
void
PybindAlsa
(
py
::
module
*
m
)
{
using
PyClass
=
FakedAlsa
;
py
::
class_
<
PyClass
>
(
*
m
,
"Alsa"
)
.
def
(
py
::
init
<
const
char
*>
(),
py
::
arg
(
"device_name"
))
.
def
(
"read"
,
[](
PyClass
&
self
,
int32_t
num_samples
)
->
std
::
vector
<
float
>
{
return
self
.
Read
(
num_samples
);
},
py
::
arg
(
"num_samples"
),
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def_property_readonly
(
"expected_sample_rate"
,
&
PyClass
::
GetExpectedSampleRate
)
.
def_property_readonly
(
"actual_sample_rate"
,
&
PyClass
::
GetActualSampleRate
);
}
}
// namespace sherpa_onnx
#endif // SHERPA_ONNX_PYTHON_CSRC_FAKED_ALSA_H_
...
...
sherpa-onnx/python/csrc/sherpa-onnx.cc
查看文件 @
d3287f9
...
...
@@ -4,6 +4,7 @@
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
#include "sherpa-onnx/python/csrc/alsa.h"
#include "sherpa-onnx/python/csrc/circular-buffer.h"
#include "sherpa-onnx/python/csrc/display.h"
#include "sherpa-onnx/python/csrc/endpoint.h"
...
...
@@ -54,6 +55,8 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
PybindOfflineTts
(
&
m
);
PybindSpeakerEmbeddingExtractor
(
&
m
);
PybindSpeakerEmbeddingManager
(
&
m
);
PybindAlsa
(
&
m
);
}
}
// namespace sherpa_onnx
...
...
sherpa-onnx/python/sherpa_onnx/__init__.py
查看文件 @
d3287f9
from
_sherpa_onnx
import
(
Alsa
,
CircularBuffer
,
Display
,
OfflineStream
,
...
...
请
注册
或
登录
后发表评论