Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-03-08 19:31:46 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-03-08 19:31:46 +0800
Commit
4b708e055cb59c5be2c38f7e2c7a3b7ec2010768
4b708e05
1 parent
d3287f94
Add microphone streaming ASR example for C API (#650)
隐藏空白字符变更
内嵌
并排对比
正在显示
8 个修改的文件
包含
285 行增加
和
1 行删除
c-api-examples/CMakeLists.txt
c-api-examples/asr-microphone-example/CMakeLists.txt
c-api-examples/asr-microphone-example/CPPLINT.cfg
c-api-examples/asr-microphone-example/README.md
c-api-examples/asr-microphone-example/alsa.cc
c-api-examples/asr-microphone-example/alsa.h
c-api-examples/asr-microphone-example/c-api-alsa.cc
c-api-examples/decode-file-c-api.c
c-api-examples/CMakeLists.txt
查看文件 @
4b708e0
...
...
@@ -6,3 +6,9 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
add_executable
(
offline-tts-c-api offline-tts-c-api.c
)
target_link_libraries
(
offline-tts-c-api sherpa-onnx-c-api cargs
)
if
(
SHERPA_ONNX_HAS_ALSA
)
add_subdirectory
(
./asr-microphone-example
)
else
()
message
(
WARNING
"Not include ./asr-microphone-example since alsa is not available"
)
endif
()
...
...
c-api-examples/asr-microphone-example/CMakeLists.txt
0 → 100644
查看文件 @
4b708e0
add_executable
(
c-api-alsa c-api-alsa.cc alsa.cc
)
target_link_libraries
(
c-api-alsa sherpa-onnx-c-api cargs
)
if
(
DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR}
)
target_link_libraries
(
c-api-alsa -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound
)
else
()
target_link_libraries
(
c-api-alsa asound
)
endif
()
...
...
c-api-examples/asr-microphone-example/CPPLINT.cfg
0 → 100644
查看文件 @
4b708e0
exclude_files=alsa.cc|alsa.h
...
...
c-api-examples/asr-microphone-example/README.md
0 → 100644
查看文件 @
4b708e0
# Introduction
This folder contains examples for real-time speech recognition from a microphone
using sherpa-onnx C API.
**Note**
: You can call C API from C++ files.
## ./c-api-alsa.cc
This file uses alsa to read a microphone. It runs only on Linux. This file
does not support macOS or Windows.
...
...
c-api-examples/asr-microphone-example/alsa.cc
0 → 120000
查看文件 @
4b708e0
..
/
..
/
sherpa
-
onnx
/
csrc
/
alsa
.
cc
\ No newline at end of file
...
...
c-api-examples/asr-microphone-example/alsa.h
0 → 120000
查看文件 @
4b708e0
..
/
..
/
sherpa
-
onnx
/
csrc
/
alsa
.
h
\ No newline at end of file
...
...
c-api-examples/asr-microphone-example/c-api-alsa.cc
0 → 100644
查看文件 @
4b708e0
// c-api-examples/asr-microphone-example/c-api-alsa.cc
// Copyright (c) 2022-2024 Xiaomi Corporation
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <cctype> // std::tolower
#include <cstdint>
#include <string>
#include "c-api-examples/asr-microphone-example/alsa.h"
// NOTE: You don't need to use cargs.h in your own project.
// We use it in this file to parse commandline arguments
#include "cargs.h" // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
static
struct
cag_option
options
[]
=
{
{.
identifier
=
'h'
,
.
access_letters
=
"h"
,
.
access_name
=
"help"
,
.
description
=
"Show help"
},
{.
identifier
=
't'
,
.
access_letters
=
NULL
,
.
access_name
=
"tokens"
,
.
value_name
=
"tokens"
,
.
description
=
"Tokens file"
},
{.
identifier
=
'e'
,
.
access_letters
=
NULL
,
.
access_name
=
"encoder"
,
.
value_name
=
"encoder"
,
.
description
=
"Encoder ONNX file"
},
{.
identifier
=
'd'
,
.
access_letters
=
NULL
,
.
access_name
=
"decoder"
,
.
value_name
=
"decoder"
,
.
description
=
"Decoder ONNX file"
},
{.
identifier
=
'j'
,
.
access_letters
=
NULL
,
.
access_name
=
"joiner"
,
.
value_name
=
"joiner"
,
.
description
=
"Joiner ONNX file"
},
{.
identifier
=
'n'
,
.
access_letters
=
NULL
,
.
access_name
=
"num-threads"
,
.
value_name
=
"num-threads"
,
.
description
=
"Number of threads"
},
{.
identifier
=
'p'
,
.
access_letters
=
NULL
,
.
access_name
=
"provider"
,
.
value_name
=
"provider"
,
.
description
=
"Provider: cpu (default), cuda, coreml"
},
{.
identifier
=
'm'
,
.
access_letters
=
NULL
,
.
access_name
=
"decoding-method"
,
.
value_name
=
"decoding-method"
,
.
description
=
"Decoding method: greedy_search (default), modified_beam_search"
},
{.
identifier
=
'f'
,
.
access_letters
=
NULL
,
.
access_name
=
"hotwords-file"
,
.
value_name
=
"hotwords-file"
,
.
description
=
"The file containing hotwords, one words/phrases per line, "
"and for each phrase the bpe/cjkchar are separated by a "
"space. For example: ▁HE LL O ▁WORLD, 你 好 世 界"
},
{.
identifier
=
's'
,
.
access_letters
=
NULL
,
.
access_name
=
"hotwords-score"
,
.
value_name
=
"hotwords-score"
,
.
description
=
"The bonus score for each token in hotwords. Used only "
"when decoding_method is modified_beam_search"
},
};
const
char
*
kUsage
=
R"(
Usage:
./bin/c-api-alsa \
--tokens=/path/to/tokens.txt \
--encoder=/path/to/encoder.onnx \
--decoder=/path/to/decoder.onnx \
--joiner=/path/to/decoder.onnx \
device_name
The device name specifies which microphone to use in case there are several
on your system. You can use
arecord -l
to find all available microphones on your computer. For instance, if it outputs
**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
Subdevices: 1/1
Subdevice #0: subdevice #0
and if you want to select card 3 and the device 0 on that card, please use:
plughw:3,0
as the device_name.
)"
;
bool
stop
=
false
;
static
void
Handler
(
int
sig
)
{
stop
=
true
;
fprintf
(
stderr
,
"
\n
Caught Ctrl + C. Exiting...
\n
"
);
}
int32_t
main
(
int32_t
argc
,
char
*
argv
[])
{
if
(
argc
<
6
)
{
fprintf
(
stderr
,
"%s
\n
"
,
kUsage
);
exit
(
0
);
}
signal
(
SIGINT
,
Handler
);
SherpaOnnxOnlineRecognizerConfig
config
;
memset
(
&
config
,
0
,
sizeof
(
config
));
config
.
model_config
.
debug
=
0
;
config
.
model_config
.
num_threads
=
1
;
config
.
model_config
.
provider
=
"cpu"
;
config
.
decoding_method
=
"greedy_search"
;
config
.
max_active_paths
=
4
;
config
.
feat_config
.
sample_rate
=
16000
;
config
.
feat_config
.
feature_dim
=
80
;
config
.
enable_endpoint
=
1
;
config
.
rule1_min_trailing_silence
=
2.4
;
config
.
rule2_min_trailing_silence
=
1.2
;
config
.
rule3_min_utterance_length
=
300
;
cag_option_context
context
;
char
identifier
;
const
char
*
value
;
cag_option_prepare
(
&
context
,
options
,
CAG_ARRAY_SIZE
(
options
),
argc
,
argv
);
while
(
cag_option_fetch
(
&
context
))
{
identifier
=
cag_option_get
(
&
context
);
value
=
cag_option_get_value
(
&
context
);
switch
(
identifier
)
{
case
't'
:
config
.
model_config
.
tokens
=
value
;
break
;
case
'e'
:
config
.
model_config
.
transducer
.
encoder
=
value
;
break
;
case
'd'
:
config
.
model_config
.
transducer
.
decoder
=
value
;
break
;
case
'j'
:
config
.
model_config
.
transducer
.
joiner
=
value
;
break
;
case
'n'
:
config
.
model_config
.
num_threads
=
atoi
(
value
);
break
;
case
'p'
:
config
.
model_config
.
provider
=
value
;
break
;
case
'm'
:
config
.
decoding_method
=
value
;
break
;
case
'f'
:
config
.
hotwords_file
=
value
;
break
;
case
's'
:
config
.
hotwords_score
=
atof
(
value
);
break
;
case
'h'
:
{
fprintf
(
stderr
,
"%s
\n
"
,
kUsage
);
exit
(
0
);
break
;
}
default:
// do nothing as config already has valid default values
break
;
}
}
SherpaOnnxOnlineRecognizer
*
recognizer
=
CreateOnlineRecognizer
(
&
config
);
SherpaOnnxOnlineStream
*
stream
=
CreateOnlineStream
(
recognizer
);
SherpaOnnxDisplay
*
display
=
CreateDisplay
(
50
);
int32_t
segment_id
=
0
;
const
char
*
device_name
=
argv
[
context
.
index
];
sherpa_onnx
::
Alsa
alsa
(
device_name
);
fprintf
(
stderr
,
"Use recording device: %s
\n
"
,
device_name
);
fprintf
(
stderr
,
"Please
\033
[32m
\033
[1mspeak
\033
[0m! Press
\033
[31m
\033
[1mCtrl + "
"C
\033
[0m to exit
\n
"
);
int32_t
expected_sample_rate
=
16000
;
if
(
alsa
.
GetExpectedSampleRate
()
!=
expected_sample_rate
)
{
fprintf
(
stderr
,
"sample rate: %d != %d
\n
"
,
alsa
.
GetExpectedSampleRate
(),
expected_sample_rate
);
exit
(
-
1
);
}
int32_t
chunk
=
0.1
*
alsa
.
GetActualSampleRate
();
std
::
string
last_text
;
int32_t
segment_index
=
0
;
while
(
!
stop
)
{
const
std
::
vector
<
float
>
&
samples
=
alsa
.
Read
(
chunk
);
AcceptWaveform
(
stream
,
expected_sample_rate
,
samples
.
data
(),
samples
.
size
());
while
(
IsOnlineStreamReady
(
recognizer
,
stream
))
{
DecodeOnlineStream
(
recognizer
,
stream
);
}
const
SherpaOnnxOnlineRecognizerResult
*
r
=
GetOnlineStreamResult
(
recognizer
,
stream
);
std
::
string
text
=
r
->
text
;
DestroyOnlineRecognizerResult
(
r
);
if
(
!
text
.
empty
()
&&
last_text
!=
text
)
{
last_text
=
text
;
std
::
transform
(
text
.
begin
(),
text
.
end
(),
text
.
begin
(),
[](
auto
c
)
{
return
std
::
tolower
(
c
);
});
SherpaOnnxPrint
(
display
,
segment_index
,
text
.
c_str
());
fflush
(
stderr
);
}
if
(
IsEndpoint
(
recognizer
,
stream
))
{
if
(
!
text
.
empty
())
{
++
segment_index
;
}
Reset
(
recognizer
,
stream
);
}
}
// free allocated resources
DestroyDisplay
(
display
);
DestroyOnlineStream
(
stream
);
DestroyOnlineRecognizer
(
recognizer
);
fprintf
(
stderr
,
"
\n
"
);
return
0
;
}
...
...
c-api-examples/decode-file-c-api.c
查看文件 @
4b708e0
...
...
@@ -157,7 +157,7 @@ int32_t main(int32_t argc, char *argv[]) {
break
;
}
default:
// do nothing as config already ha
ve
valid default values
// do nothing as config already ha
s
valid default values
break
;
}
}
...
...
请
注册
或
登录
后发表评论