Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-03-25 15:16:47 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-03-25 15:16:47 +0800
Commit
ab7cff2513c956e705c4bd5fd061de9c5c5f65e3
ab7cff25
1 parent
0d258dd1
Add C API for spoken language identification. (#695)
隐藏空白字符变更
内嵌
并排对比
正在显示
18 个修改的文件
包含
363 行增加
和
67 行删除
.github/scripts/test-c-api.sh
.github/scripts/test-spoken-language-identification.sh
.github/workflows/android.yaml
.github/workflows/build-xcframework.yaml
.github/workflows/linux.yaml
.github/workflows/macos.yaml
.github/workflows/windows-x64.yaml
.github/workflows/windows-x86.yaml
.gitignore
c-api-examples/CMakeLists.txt
c-api-examples/Makefile
c-api-examples/decode-file-c-api.c
c-api-examples/spoken-language-identification-c-api.c
dotnet-examples/offline-decode-files/run-hotwords.sh
dotnet-examples/offline-decode-files/run-zipformer.sh
dotnet-examples/online-decode-files/run-transducer.sh
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
.github/scripts/test-c-api.sh
0 → 100755
查看文件 @
ab7cff2
#!/usr/bin/env bash
set
-e
log
()
{
# This function is from espnet
local
fname
=
${
BASH_SOURCE
[1]##*/
}
echo
-e
"
$(
date
'+%Y-%m-%d %H:%M:%S'
)
(
${
fname
}
:
${
BASH_LINENO
[0]
}
:
${
FUNCNAME
[1]
}
)
$*
"
}
echo
"SLID_EXE is
$SLID_EXE
"
echo
"PATH:
$PATH
"
log
"------------------------------------------------------------"
log
"Download whisper tiny for spoken language identification "
log
"------------------------------------------------------------"
rm -rf sherpa-onnx-whisper-tiny
*
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2
$SLID_EXE
rm -rf sherpa-onnx-whisper-tiny
*
...
...
.github/scripts/test-spoken-language-identification.sh
查看文件 @
ab7cff2
...
...
@@ -28,32 +28,32 @@ ar-arabic.wav
bg
-bulgarian.wav
cs-czech.wav
da-danish.wav
de-german.wav
el-greek.wav
en-english.wav
es-spanish.wav
fa-persian.wav
fi
-finnish.wav
fr-french.wav
hi-hindi.wav
hr-croatian.wav
id-indonesian.wav
it-italian.wav
ja-japanese.wav
ko-korean.wav
nl-dutch.wav
no-norwegian.wav
po-polish.wav
pt-portuguese.wav
ro-romanian.wav
ru-russian.wav
sk-slovak.wav
sv-swedish.wav
ta-tamil.wav
tl-tagalog.wav
tr-turkish.wav
uk-ukrainian.wav
zh-chinese.wav
# de-german.wav
# el-greek.wav
# en-english.wav
# es-spanish.wav
# fa-persian.wav
# fi-finnish.wav
# fr-french.wav
# hi-hindi.wav
# hr-croatian.wav
# id-indonesian.wav
# it-italian.wav
# ja-japanese.wav
# ko-korean.wav
# nl-dutch.wav
# no-norwegian.wav
# po-polish.wav
# pt-portuguese.wav
# ro-romanian.wav
# ru-russian.wav
# sk-slovak.wav
# sv-swedish.wav
# ta-tamil.wav
# tl-tagalog.wav
# tr-turkish.wav
# uk-ukrainian.wav
# zh-chinese.wav
)
for
wav
in
${
waves
[@]
}
;
do
...
...
.github/workflows/android.yaml
查看文件 @
ab7cff2
...
...
@@ -113,6 +113,7 @@ jobs:
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
...
...
.github/workflows/build-xcframework.yaml
查看文件 @
ab7cff2
...
...
@@ -90,6 +90,7 @@ jobs:
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
...
...
.github/workflows/linux.yaml
查看文件 @
ab7cff2
...
...
@@ -123,8 +123,15 @@ jobs:
name
:
release-${{ matrix.build_type }}-${{ matrix.shared_lib }}
path
:
build/bin/*
-
name
:
Test spoken language identification
if
:
matrix.build_type != 'Debug'
-
name
:
Test spoken language identification (C API)
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
.github/scripts/test-c-api.sh
-
name
:
Test spoken language identification (C++ API)
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
...
...
@@ -243,6 +250,7 @@ jobs:
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
...
...
.github/workflows/macos.yaml
查看文件 @
ab7cff2
...
...
@@ -102,8 +102,15 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
-
name
:
Test spoken language identification
if
:
matrix.build_type != 'Debug'
-
name
:
Test spoken language identification (C API)
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
.github/scripts/test-c-api.sh
-
name
:
Test spoken language identification (C++ API)
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
...
...
.github/workflows/windows-x64.yaml
查看文件 @
ab7cff2
...
...
@@ -68,7 +68,15 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
-
name
:
Test spoken language identification
-
name
:
Test spoken language identification (C API)
shell
:
bash
run
:
|
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe
.github/scripts/test-c-api.sh
-
name
:
Test spoken language identification (C++ API)
shell
:
bash
run
:
|
export PATH=$PWD/build/bin/Release:$PATH
...
...
.github/workflows/windows-x86.yaml
查看文件 @
ab7cff2
...
...
@@ -69,6 +69,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
-
name
:
Test spoken language identification (C API)
shell
:
bash
run
:
|
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe
.github/scripts/test-c-api.sh
# - name: Test spoken language identification
# shell: bash
# run: |
...
...
.gitignore
查看文件 @
ab7cff2
...
...
@@ -85,3 +85,4 @@ log
vits-piper-*
vits-coqui-*
vits-mms-*
*.tar.bz2
...
...
c-api-examples/CMakeLists.txt
查看文件 @
ab7cff2
...
...
@@ -7,8 +7,11 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
add_executable
(
offline-tts-c-api offline-tts-c-api.c
)
target_link_libraries
(
offline-tts-c-api sherpa-onnx-c-api cargs
)
add_executable
(
spoken-language-identification-c-api spoken-language-identification-c-api.c
)
target_link_libraries
(
spoken-language-identification-c-api sherpa-onnx-c-api
)
if
(
SHERPA_ONNX_HAS_ALSA
)
add_subdirectory
(
./asr-microphone-example
)
else
(
)
else
if
((
UNIX AND NOT APPLE
)
OR LINUX
)
message
(
WARNING
"Not include ./asr-microphone-example since alsa is not available"
)
endif
()
...
...
c-api-examples/Makefile
查看文件 @
ab7cff2
...
...
@@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd)
CFLAGS
:=
-I ../ -I ../build/_deps/cargs-src/include/
LDFLAGS
:=
-L ../build/lib
LDFLAGS
+=
-L ../build/_deps/onnxruntime-src/lib
LDFLAGS
+=
-lsherpa-onnx-c-api -lsherpa-onnx-core -l
onnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs
LDFLAGS
+=
-lsherpa-onnx-c-api -lsherpa-onnx-core -l
kaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
LDFLAGS
+=
-framework Foundation
LDFLAGS
+=
-lc++
LDFLAGS
+=
-Wl,-rpath,
${
CUR_DIR
}
/../build/lib
...
...
c-api-examples/decode-file-c-api.c
查看文件 @
ab7cff2
...
...
@@ -169,55 +169,56 @@ int32_t main(int32_t argc, char *argv[]) {
int32_t
segment_id
=
0
;
const
char
*
wav_filename
=
argv
[
context
.
index
];
FILE
*
fp
=
fopen
(
wav_filename
,
"rb"
);
if
(
!
fp
)
{
fprintf
(
stderr
,
"Failed to open %s
\n
"
,
wav_filename
);
const
SherpaOnnxWave
*
wave
=
SherpaOnnxReadWave
(
wav_filename
);
if
(
wave
==
NULL
)
{
fprintf
(
stderr
,
"Failed to read %s
\n
"
,
wav_filename
);
return
-
1
;
}
// Assume the wave header occupies 44 bytes.
fseek
(
fp
,
44
,
SEEK_SET
);
// simulate streaming
#define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz
int16_t
buffer
[
N
];
float
samples
[
N
];
fprintf
(
stderr
,
"sample rate: %d, num samples: %d, duration: %.2f s
\n
"
,
wave
->
sample_rate
,
wave
->
num_samples
,
(
float
)
wave
->
num_samples
/
wave
->
sample_rate
);
int32_t
k
=
0
;
while
(
k
<
wave
->
num_samples
)
{
int32_t
start
=
k
;
int32_t
end
=
(
start
+
N
>
wave
->
num_samples
)
?
wave
->
num_samples
:
(
start
+
N
);
k
+=
N
;
AcceptWaveform
(
stream
,
wave
->
sample_rate
,
wave
->
samples
+
start
,
end
-
start
);
while
(
IsOnlineStreamReady
(
recognizer
,
stream
))
{
DecodeOnlineStream
(
recognizer
,
stream
);
}
while
(
!
feof
(
fp
))
{
size_t
n
=
fread
((
void
*
)
buffer
,
sizeof
(
int16_t
),
N
,
fp
);
if
(
n
>
0
)
{
for
(
size_t
i
=
0
;
i
!=
n
;
++
i
)
{
samples
[
i
]
=
buffer
[
i
]
/
32768
.;
}
AcceptWaveform
(
stream
,
16000
,
samples
,
n
);
while
(
IsOnlineStreamReady
(
recognizer
,
stream
))
{
DecodeOnlineStream
(
recognizer
,
stream
);
}
const
SherpaOnnxOnlineRecognizerResult
*
r
=
GetOnlineStreamResult
(
recognizer
,
stream
);
const
SherpaOnnxOnlineRecognizerResult
*
r
=
GetOnlineStreamResult
(
recognizer
,
stream
);
if
(
strlen
(
r
->
text
))
{
SherpaOnnxPrint
(
display
,
segment_id
,
r
->
text
);
}
if
(
IsEndpoint
(
recognizer
,
stream
))
{
if
(
strlen
(
r
->
text
))
{
SherpaOnnxPrint
(
display
,
segment_id
,
r
->
text
)
;
++
segment_id
;
}
if
(
IsEndpoint
(
recognizer
,
stream
))
{
if
(
strlen
(
r
->
text
))
{
++
segment_id
;
}
Reset
(
recognizer
,
stream
);
}
DestroyOnlineRecognizerResult
(
r
);
Reset
(
recognizer
,
stream
);
}
DestroyOnlineRecognizerResult
(
r
);
}
fclose
(
fp
);
// add some tail padding
float
tail_paddings
[
4800
]
=
{
0
};
// 0.3 seconds at 16 kHz sample rate
AcceptWaveform
(
stream
,
16000
,
tail_paddings
,
4800
);
AcceptWaveform
(
stream
,
wave
->
sample_rate
,
tail_paddings
,
4800
);
SherpaOnnxFreeWave
(
wave
);
InputFinished
(
stream
);
while
(
IsOnlineStreamReady
(
recognizer
,
stream
))
{
...
...
c-api-examples/spoken-language-identification-c-api.c
0 → 100644
查看文件 @
ab7cff2
// We assume you have pre-downloaded the whisper multi-lingual models
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// An example command to download the "tiny" whisper model is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
// rm sherpa-onnx-whisper-tiny.tar.bz2
//
// clang-format on
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sherpa-onnx/c-api/c-api.h"
int32_t
main
()
{
SherpaOnnxSpokenLanguageIdentificationConfig
config
;
memset
(
&
config
,
0
,
sizeof
(
config
));
config
.
whisper
.
encoder
=
"./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"
;
config
.
whisper
.
decoder
=
"./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"
;
config
.
num_threads
=
1
;
config
.
debug
=
1
;
config
.
provider
=
"cpu"
;
const
SherpaOnnxSpokenLanguageIdentification
*
slid
=
SherpaOnnxCreateSpokenLanguageIdentification
(
&
config
);
if
(
!
slid
)
{
fprintf
(
stderr
,
"Failed to create spoken language identifier"
);
return
-
1
;
}
// You can find more test waves from
// https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs
const
char
*
wav_filename
=
"./sherpa-onnx-whisper-tiny/test_wavs/0.wav"
;
const
SherpaOnnxWave
*
wave
=
SherpaOnnxReadWave
(
wav_filename
);
if
(
wave
==
NULL
)
{
fprintf
(
stderr
,
"Failed to read %s
\n
"
,
wav_filename
);
return
-
1
;
}
SherpaOnnxOfflineStream
*
stream
=
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream
(
slid
);
AcceptWaveformOffline
(
stream
,
wave
->
sample_rate
,
wave
->
samples
,
wave
->
num_samples
);
const
SherpaOnnxSpokenLanguageIdentificationResult
*
result
=
SherpaOnnxSpokenLanguageIdentificationCompute
(
slid
,
stream
);
fprintf
(
stderr
,
"wav_filename: %s
\n
"
,
wav_filename
);
fprintf
(
stderr
,
"Detected language: %s
\n
"
,
result
->
lang
);
SherpaOnnxDestroySpokenLanguageIdentificationResult
(
result
);
DestroyOfflineStream
(
stream
);
SherpaOnnxFreeWave
(
wave
);
SherpaOnnxDestroySpokenLanguageIdentification
(
slid
);
return
0
;
}
...
...
dotnet-examples/offline-decode-files/run-hotwords.sh
查看文件 @
ab7cff2
...
...
@@ -3,7 +3,7 @@
set
-ex
if
[
! -d ./sherpa-onnx-zipformer-en-2023-04-01
]
;
then
wget -q
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
curl -SL -O
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
fi
...
...
dotnet-examples/offline-decode-files/run-zipformer.sh
查看文件 @
ab7cff2
...
...
@@ -3,7 +3,7 @@
set
-ex
if
[
! -d ./sherpa-onnx-zipformer-en-2023-04-01
]
;
then
wget -q
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
curl -SL -O
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
fi
...
...
dotnet-examples/online-decode-files/run-transducer.sh
查看文件 @
ab7cff2
...
...
@@ -6,7 +6,7 @@
set
-ex
if
[
! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
]
;
then
wget -q
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
curl -SL -O
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi
...
...
sherpa-onnx/c-api/c-api.cc
查看文件 @
ab7cff2
...
...
@@ -6,6 +6,7 @@
#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>
...
...
@@ -16,7 +17,9 @@
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/spoken-language-identification.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#include "sherpa-onnx/csrc/wave-writer.h"
struct
SherpaOnnxOnlineRecognizer
{
...
...
@@ -859,3 +862,97 @@ int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
int32_t
sample_rate
,
const
char
*
filename
)
{
return
sherpa_onnx
::
WriteWave
(
filename
,
sample_rate
,
samples
,
n
);
}
const
SherpaOnnxWave
*
SherpaOnnxReadWave
(
const
char
*
filename
)
{
int32_t
sample_rate
=
-
1
;
bool
is_ok
=
false
;
std
::
vector
<
float
>
samples
=
sherpa_onnx
::
ReadWave
(
filename
,
&
sample_rate
,
&
is_ok
);
if
(
!
is_ok
)
{
return
nullptr
;
}
float
*
c_samples
=
new
float
[
samples
.
size
()];
std
::
copy
(
samples
.
begin
(),
samples
.
end
(),
c_samples
);
SherpaOnnxWave
*
wave
=
new
SherpaOnnxWave
;
wave
->
samples
=
c_samples
;
wave
->
sample_rate
=
sample_rate
;
wave
->
num_samples
=
samples
.
size
();
return
wave
;
}
void
SherpaOnnxFreeWave
(
const
SherpaOnnxWave
*
wave
)
{
if
(
wave
)
{
delete
[]
wave
->
samples
;
delete
wave
;
}
}
struct
SherpaOnnxSpokenLanguageIdentification
{
std
::
unique_ptr
<
sherpa_onnx
::
SpokenLanguageIdentification
>
impl
;
};
const
SherpaOnnxSpokenLanguageIdentification
*
SherpaOnnxCreateSpokenLanguageIdentification
(
const
SherpaOnnxSpokenLanguageIdentificationConfig
*
config
)
{
sherpa_onnx
::
SpokenLanguageIdentificationConfig
slid_config
;
slid_config
.
whisper
.
encoder
=
SHERPA_ONNX_OR
(
config
->
whisper
.
encoder
,
""
);
slid_config
.
whisper
.
decoder
=
SHERPA_ONNX_OR
(
config
->
whisper
.
decoder
,
""
);
slid_config
.
whisper
.
tail_paddings
=
SHERPA_ONNX_OR
(
config
->
whisper
.
tail_paddings
,
-
1
);
slid_config
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
num_threads
,
1
);
slid_config
.
debug
=
config
->
debug
;
slid_config
.
provider
=
SHERPA_ONNX_OR
(
config
->
provider
,
"cpu"
);
if
(
slid_config
.
debug
)
{
SHERPA_ONNX_LOGE
(
"%s
\n
"
,
slid_config
.
ToString
().
c_str
());
}
if
(
!
slid_config
.
Validate
())
{
SHERPA_ONNX_LOGE
(
"Errors in config"
);
return
nullptr
;
}
SherpaOnnxSpokenLanguageIdentification
*
slid
=
new
SherpaOnnxSpokenLanguageIdentification
;
slid
->
impl
=
std
::
make_unique
<
sherpa_onnx
::
SpokenLanguageIdentification
>
(
slid_config
);
return
slid
;
}
void
SherpaOnnxDestroySpokenLanguageIdentification
(
const
SherpaOnnxSpokenLanguageIdentification
*
slid
)
{
delete
slid
;
}
SherpaOnnxOfflineStream
*
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream
(
const
SherpaOnnxSpokenLanguageIdentification
*
slid
)
{
SherpaOnnxOfflineStream
*
stream
=
new
SherpaOnnxOfflineStream
(
slid
->
impl
->
CreateStream
());
return
stream
;
}
const
SherpaOnnxSpokenLanguageIdentificationResult
*
SherpaOnnxSpokenLanguageIdentificationCompute
(
const
SherpaOnnxSpokenLanguageIdentification
*
slid
,
const
SherpaOnnxOfflineStream
*
s
)
{
std
::
string
lang
=
slid
->
impl
->
Compute
(
s
->
impl
.
get
());
char
*
c_lang
=
new
char
[
lang
.
size
()
+
1
];
std
::
copy
(
lang
.
begin
(),
lang
.
end
(),
c_lang
);
c_lang
[
lang
.
size
()]
=
'\0'
;
SherpaOnnxSpokenLanguageIdentificationResult
*
r
=
new
SherpaOnnxSpokenLanguageIdentificationResult
;
r
->
lang
=
c_lang
;
return
r
;
}
void
SherpaOnnxDestroySpokenLanguageIdentificationResult
(
const
SherpaOnnxSpokenLanguageIdentificationResult
*
r
)
{
if
(
r
)
{
delete
[]
r
->
lang
;
delete
r
;
}
}
...
...
sherpa-onnx/c-api/c-api.h
查看文件 @
ab7cff2
...
...
@@ -820,6 +820,76 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
int32_t
sample_rate
,
const
char
*
filename
);
SHERPA_ONNX_API
typedef
struct
SherpaOnnxWave
{
// samples normalized to the range [-1, 1]
const
float
*
samples
;
int32_t
sample_rate
;
int32_t
num_samples
;
}
SherpaOnnxWave
;
// Return a NULL pointer on error. It supports only standard WAVE file.
// Each sample should be 16-bit. It supports only single channel..
//
// If the returned pointer is not NULL, the user has to invoke
// SherpaOnnxFreeWave() to free the returned pointer to avoid memory leak.
SHERPA_ONNX_API
const
SherpaOnnxWave
*
SherpaOnnxReadWave
(
const
char
*
filename
);
SHERPA_ONNX_API
void
SherpaOnnxFreeWave
(
const
SherpaOnnxWave
*
wave
);
// Spoken language identification
SHERPA_ONNX_API
typedef
struct
SherpaOnnxSpokenLanguageIdentificationWhisperConfig
{
const
char
*
encoder
;
const
char
*
decoder
;
int32_t
tail_paddings
;
}
SherpaOnnxSpokenLanguageIdentificationWhisperConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxSpokenLanguageIdentificationConfig
{
SherpaOnnxSpokenLanguageIdentificationWhisperConfig
whisper
;
int32_t
num_threads
;
int32_t
debug
;
const
char
*
provider
;
}
SherpaOnnxSpokenLanguageIdentificationConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxSpokenLanguageIdentification
SherpaOnnxSpokenLanguageIdentification
;
// Create an instance of SpokenLanguageIdentification.
// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentification()
// to free the returned pointer to avoid memory leak.
SHERPA_ONNX_API
const
SherpaOnnxSpokenLanguageIdentification
*
SherpaOnnxCreateSpokenLanguageIdentification
(
const
SherpaOnnxSpokenLanguageIdentificationConfig
*
config
);
SHERPA_ONNX_API
void
SherpaOnnxDestroySpokenLanguageIdentification
(
const
SherpaOnnxSpokenLanguageIdentification
*
slid
);
// The user has to invoke DestroyOfflineStream()
// to free the returned pointer to avoid memory leak
SHERPA_ONNX_API
SherpaOnnxOfflineStream
*
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream
(
const
SherpaOnnxSpokenLanguageIdentification
*
slid
);
SHERPA_ONNX_API
typedef
struct
SherpaOnnxSpokenLanguageIdentificationResult
{
// en for English
// de for German
// zh for Chinese
// es for Spanish
// ...
const
char
*
lang
;
}
SherpaOnnxSpokenLanguageIdentificationResult
;
// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentificationResult()
// to free the returned pointer to avoid memory leak
SHERPA_ONNX_API
const
SherpaOnnxSpokenLanguageIdentificationResult
*
SherpaOnnxSpokenLanguageIdentificationCompute
(
const
SherpaOnnxSpokenLanguageIdentification
*
slid
,
const
SherpaOnnxOfflineStream
*
s
);
SHERPA_ONNX_API
void
SherpaOnnxDestroySpokenLanguageIdentificationResult
(
const
SherpaOnnxSpokenLanguageIdentificationResult
*
r
);
#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
...
...
请
注册
或
登录
后发表评论