Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-04-11 14:18:43 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-04-11 14:18:43 +0800
Commit
f204e62b44147c8eb18a5a00d6043e8d1d70e6d7
f204e62b
1 parent
34d70a25
Add C API for audio tagging (#754)
隐藏空白字符变更
内嵌
并排对比
正在显示
9 个修改的文件
包含
285 行增加
和
32 行删除
.github/scripts/test-c-api.sh
.github/workflows/linux.yaml
.github/workflows/macos.yaml
.github/workflows/windows-x64.yaml
.github/workflows/windows-x86.yaml
c-api-examples/CMakeLists.txt
c-api-examples/audio-tagging-c-api.c
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
.github/scripts/test-c-api.sh
查看文件 @
f204e62
...
...
@@ -10,8 +10,21 @@ log() {
echo
"SLID_EXE is
$SLID_EXE
"
echo
"SID_EXE is
$SID_EXE
"
echo
"AT_EXE is
$AT_EXE
"
echo
"PATH:
$PATH
"
log
"------------------------------------------------------------"
log
"Test audio tagging "
log
"------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
$AT_EXE
rm -rf sherpa-onnx-zipformer-audio-tagging-2024-04-09
log
"------------------------------------------------------------"
log
"Download whisper tiny for spoken language identification "
...
...
.github/workflows/linux.yaml
查看文件 @
f204e62
...
...
@@ -126,6 +126,16 @@ jobs:
name
:
release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path
:
build/bin/*
-
name
:
Test C API
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
export SID_EXE=speaker-identification-c-api
export AT_EXE=audio-tagging-c-api
.github/scripts/test-c-api.sh
-
name
:
Test Audio tagging
shell
:
bash
run
:
|
...
...
@@ -142,14 +152,6 @@ jobs:
.github/scripts/test-online-ctc.sh
-
name
:
Test C API
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
export SID_EXE=speaker-identification-c-api
.github/scripts/test-c-api.sh
-
name
:
Test spoken language identification (C++ API)
shell
:
bash
...
...
.github/workflows/macos.yaml
查看文件 @
f204e62
...
...
@@ -105,22 +105,23 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
-
name
:
Test
Audio tagging
-
name
:
Test
C API
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-audio-tagging
export SLID_EXE=spoken-language-identification-c-api
export SID_EXE=speaker-identification-c-api
export AT_EXE=audio-tagging-c-api
.github/scripts/test-
audio-tagging
.sh
.github/scripts/test-
c-api
.sh
-
name
:
Test
C API
-
name
:
Test
Audio tagging
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
export SID_EXE=speaker-identification-c-api
export EXE=sherpa-onnx-offline-audio-tagging
.github/scripts/test-
c-api
.sh
.github/scripts/test-
audio-tagging
.sh
-
name
:
Test spoken language identification (C++ API)
shell
:
bash
...
...
.github/workflows/windows-x64.yaml
查看文件 @
f204e62
...
...
@@ -72,22 +72,24 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
-
name
:
Test
Audio tagging
-
name
:
Test
C API
shell
:
bash
run
:
|
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx-offline-audio-tagging.exe
export SLID_EXE=spoken-language-identification-c-api.exe
export SID_EXE=speaker-identification-c-api.exe
export AT_EXE=audio-tagging-c-api.exe
.github/scripts/test-
audio-tagging
.sh
.github/scripts/test-
c-api
.sh
-
name
:
Test C API
-
name
:
Test Audio tagging
shell
:
bash
run
:
|
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe
export SID_EXE=speaker-identification-c-api.exe
export EXE=sherpa-onnx-offline-audio-tagging.exe
.github/scripts/test-
c-api
.sh
.github/scripts/test-
audio-tagging
.sh
-
name
:
Test spoken language identification (C++ API)
shell
:
bash
...
...
.github/workflows/windows-x86.yaml
查看文件 @
f204e62
...
...
@@ -77,6 +77,8 @@ jobs:
run
:
|
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe
export SID_EXE=speaker-identification-c-api.exe
export AT_EXE=audio-tagging-c-api.exe
.github/scripts/test-c-api.sh
...
...
c-api-examples/CMakeLists.txt
查看文件 @
f204e62
...
...
@@ -18,6 +18,9 @@ target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api)
add_executable
(
streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c
)
target_link_libraries
(
streaming-hlg-decode-file-c-api sherpa-onnx-c-api
)
add_executable
(
audio-tagging-c-api audio-tagging-c-api.c
)
target_link_libraries
(
audio-tagging-c-api sherpa-onnx-c-api
)
if
(
SHERPA_ONNX_HAS_ALSA
)
add_subdirectory
(
./asr-microphone-example
)
elseif
((
UNIX AND NOT APPLE
)
OR LINUX
)
...
...
c-api-examples/audio-tagging-c-api.c
0 → 100644
查看文件 @
f204e62
// c-api-examples/audio-tagging-c-api.c
//
// Copyright (c) 2024 Xiaomi Corporation
// We assume you have pre-downloaded the model files for testing
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
//
// An example is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
// tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
// rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
//
// clang-format on
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sherpa-onnx/c-api/c-api.h"
int32_t
main
()
{
SherpaOnnxAudioTaggingConfig
config
;
memset
(
&
config
,
0
,
sizeof
(
config
));
config
.
model
.
zipformer
.
model
=
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx"
;
config
.
model
.
num_threads
=
1
;
config
.
model
.
debug
=
1
;
config
.
model
.
provider
=
"cpu"
;
// clang-format off
config
.
labels
=
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv"
;
// clang-format on
const
SherpaOnnxAudioTagging
*
tagger
=
SherpaOnnxCreateAudioTagging
(
&
config
);
if
(
!
tagger
)
{
fprintf
(
stderr
,
"Failed to create audio tagger. Please check your config"
);
return
-
1
;
}
// You can find more test waves from
// https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
const
char
*
wav_filename
=
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav"
;
const
SherpaOnnxWave
*
wave
=
SherpaOnnxReadWave
(
wav_filename
);
if
(
wave
==
NULL
)
{
fprintf
(
stderr
,
"Failed to read %s
\n
"
,
wav_filename
);
return
-
1
;
}
const
SherpaOnnxOfflineStream
*
stream
=
SherpaOnnxAudioTaggingCreateOfflineStream
(
tagger
);
AcceptWaveformOffline
(
stream
,
wave
->
sample_rate
,
wave
->
samples
,
wave
->
num_samples
);
int32_t
top_k
=
5
;
const
SherpaOnnxAudioEvent
*
const
*
results
=
SherpaOnnxAudioTaggingCompute
(
tagger
,
stream
,
top_k
);
fprintf
(
stderr
,
"--------------------------------------------------
\n
"
);
fprintf
(
stderr
,
"Index
\t\t
Probability
\t\t
Event name
\n
"
);
fprintf
(
stderr
,
"--------------------------------------------------
\n
"
);
for
(
int32_t
i
=
0
;
i
!=
top_k
;
++
i
)
{
fprintf
(
stderr
,
"%d
\t\t
%.3f
\t\t\t
%s
\n
"
,
i
,
results
[
i
]
->
prob
,
results
[
i
]
->
name
);
}
fprintf
(
stderr
,
"--------------------------------------------------
\n
"
);
SherpaOnnxAudioTaggingFreeResults
(
results
);
DestroyOfflineStream
(
stream
);
SherpaOnnxFreeWave
(
wave
);
SherpaOnnxDestroyAudioTagging
(
tagger
);
return
0
;
};
...
...
sherpa-onnx/c-api/c-api.cc
查看文件 @
f204e62
...
...
@@ -10,6 +10,7 @@
#include <utility>
#include <vector>
#include "sherpa-onnx/csrc/audio-tagging.h"
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/keyword-spotter.h"
...
...
@@ -400,15 +401,18 @@ SherpaOnnxOfflineStream *CreateOfflineStream(
return
stream
;
}
void
DestroyOfflineStream
(
SherpaOnnxOfflineStream
*
stream
)
{
delete
stream
;
}
void
DestroyOfflineStream
(
const
SherpaOnnxOfflineStream
*
stream
)
{
delete
stream
;
}
void
AcceptWaveformOffline
(
SherpaOnnxOfflineStream
*
stream
,
int32_t
sample_rate
,
const
float
*
samples
,
int32_t
n
)
{
void
AcceptWaveformOffline
(
const
SherpaOnnxOfflineStream
*
stream
,
int32_t
sample_rate
,
const
float
*
samples
,
int32_t
n
)
{
stream
->
impl
->
AcceptWaveform
(
sample_rate
,
samples
,
n
);
}
void
DecodeOfflineStream
(
SherpaOnnxOfflineRecognizer
*
recognizer
,
SherpaOnnxOfflineStream
*
stream
)
{
void
DecodeOfflineStream
(
const
SherpaOnnxOfflineRecognizer
*
recognizer
,
const
SherpaOnnxOfflineStream
*
stream
)
{
recognizer
->
impl
->
DecodeStream
(
stream
->
impl
.
get
());
}
...
...
@@ -1209,3 +1213,89 @@ void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(
delete
[]
names
;
}
struct
SherpaOnnxAudioTagging
{
std
::
unique_ptr
<
sherpa_onnx
::
AudioTagging
>
impl
;
};
const
SherpaOnnxAudioTagging
*
SherpaOnnxCreateAudioTagging
(
const
SherpaOnnxAudioTaggingConfig
*
config
)
{
sherpa_onnx
::
AudioTaggingConfig
ac
;
ac
.
model
.
zipformer
.
model
=
SHERPA_ONNX_OR
(
config
->
model
.
zipformer
.
model
,
""
);
ac
.
model
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model
.
num_threads
,
1
);
ac
.
model
.
debug
=
config
->
model
.
debug
;
ac
.
model
.
provider
=
SHERPA_ONNX_OR
(
config
->
model
.
provider
,
"cpu"
);
ac
.
labels
=
SHERPA_ONNX_OR
(
config
->
labels
,
""
);
ac
.
top_k
=
SHERPA_ONNX_OR
(
config
->
top_k
,
5
);
if
(
ac
.
model
.
debug
)
{
SHERPA_ONNX_LOGE
(
"%s
\n
"
,
ac
.
ToString
().
c_str
());
}
if
(
!
ac
.
Validate
())
{
SHERPA_ONNX_LOGE
(
"Errors in config"
);
return
nullptr
;
}
SherpaOnnxAudioTagging
*
tagger
=
new
SherpaOnnxAudioTagging
;
tagger
->
impl
=
std
::
make_unique
<
sherpa_onnx
::
AudioTagging
>
(
ac
);
return
tagger
;
}
void
SherpaOnnxDestroyAudioTagging
(
const
SherpaOnnxAudioTagging
*
tagger
)
{
delete
tagger
;
}
const
SherpaOnnxOfflineStream
*
SherpaOnnxAudioTaggingCreateOfflineStream
(
const
SherpaOnnxAudioTagging
*
tagger
)
{
const
SherpaOnnxOfflineStream
*
stream
=
new
SherpaOnnxOfflineStream
(
tagger
->
impl
->
CreateStream
());
return
stream
;
}
const
SherpaOnnxAudioEvent
*
const
*
SherpaOnnxAudioTaggingCompute
(
const
SherpaOnnxAudioTagging
*
tagger
,
const
SherpaOnnxOfflineStream
*
s
,
int32_t
top_k
)
{
std
::
vector
<
sherpa_onnx
::
AudioEvent
>
events
=
tagger
->
impl
->
Compute
(
s
->
impl
.
get
(),
top_k
);
int32_t
n
=
static_cast
<
int32_t
>
(
events
.
size
());
SherpaOnnxAudioEvent
**
ans
=
new
SherpaOnnxAudioEvent
*
[
n
+
1
];
ans
[
n
]
=
nullptr
;
int32_t
i
=
0
;
for
(
const
auto
&
e
:
events
)
{
SherpaOnnxAudioEvent
*
p
=
new
SherpaOnnxAudioEvent
;
char
*
name
=
new
char
[
e
.
name
.
size
()
+
1
];
std
::
copy
(
e
.
name
.
begin
(),
e
.
name
.
end
(),
name
);
name
[
e
.
name
.
size
()]
=
0
;
p
->
name
=
name
;
p
->
index
=
e
.
index
;
p
->
prob
=
e
.
prob
;
ans
[
i
]
=
p
;
i
+=
1
;
}
return
ans
;
}
void
SherpaOnnxAudioTaggingFreeResults
(
const
SherpaOnnxAudioEvent
*
const
*
events
)
{
auto
p
=
events
;
while
(
p
&&
*
p
)
{
auto
e
=
*
p
;
delete
[]
e
->
name
;
delete
e
;
++
p
;
}
delete
[]
events
;
}
...
...
sherpa-onnx/c-api/c-api.h
查看文件 @
f204e62
...
...
@@ -427,7 +427,8 @@ SHERPA_ONNX_API SherpaOnnxOfflineStream *CreateOfflineStream(
/// Destroy an offline stream.
///
/// @param stream A pointer returned by CreateOfflineStream()
SHERPA_ONNX_API
void
DestroyOfflineStream
(
SherpaOnnxOfflineStream
*
stream
);
SHERPA_ONNX_API
void
DestroyOfflineStream
(
const
SherpaOnnxOfflineStream
*
stream
);
/// Accept input audio samples and compute the features.
/// The user has to invoke DecodeOfflineStream() to run the neural network and
...
...
@@ -442,9 +443,9 @@ SHERPA_ONNX_API void DestroyOfflineStream(SherpaOnnxOfflineStream *stream);
/// @param n Number of elements in the samples array.
///
/// @caution: For each offline stream, please invoke this function only once!
SHERPA_ONNX_API
void
AcceptWaveformOffline
(
SherpaOnnxOfflineStream
*
stream
,
int32_t
sample_rate
,
const
float
*
samples
,
int32_t
n
);
SHERPA_ONNX_API
void
AcceptWaveformOffline
(
const
SherpaOnnxOfflineStream
*
stream
,
int32_t
sample_rate
,
const
float
*
samples
,
int32_t
n
);
/// Decode an offline stream.
///
/// We assume you have invoked AcceptWaveformOffline() for the given stream
...
...
@@ -453,7 +454,8 @@ SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream,
/// @param recognizer A pointer returned by CreateOfflineRecognizer().
/// @param stream A pointer returned by CreateOfflineStream()
SHERPA_ONNX_API
void
DecodeOfflineStream
(
SherpaOnnxOfflineRecognizer
*
recognizer
,
SherpaOnnxOfflineStream
*
stream
);
const
SherpaOnnxOfflineRecognizer
*
recognizer
,
const
SherpaOnnxOfflineStream
*
stream
);
/// Decode a list offline streams in parallel.
///
...
...
@@ -1088,6 +1090,65 @@ SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(
SHERPA_ONNX_API
void
SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers
(
const
char
*
const
*
names
);
// ============================================================
// For audio tagging
// ============================================================
SHERPA_ONNX_API
typedef
struct
SherpaOnnxOfflineZipformerAudioTaggingModelConfig
{
const
char
*
model
;
}
SherpaOnnxOfflineZipformerAudioTaggingModelConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxAudioTaggingModelConfig
{
SherpaOnnxOfflineZipformerAudioTaggingModelConfig
zipformer
;
int32_t
num_threads
;
int32_t
debug
;
// true to print debug information of the model
const
char
*
provider
;
}
SherpaOnnxAudioTaggingModelConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxAudioTaggingConfig
{
SherpaOnnxAudioTaggingModelConfig
model
;
const
char
*
labels
;
int32_t
top_k
;
}
SherpaOnnxAudioTaggingConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxAudioEvent
{
const
char
*
name
;
int32_t
index
;
float
prob
;
}
SherpaOnnxAudioEvent
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxAudioTagging
SherpaOnnxAudioTagging
;
// The user has to invoke
// SherpaOnnxDestroyAudioTagging()
// to free the returned pointer to avoid memory leak
SHERPA_ONNX_API
const
SherpaOnnxAudioTagging
*
SherpaOnnxCreateAudioTagging
(
const
SherpaOnnxAudioTaggingConfig
*
config
);
SHERPA_ONNX_API
void
SherpaOnnxDestroyAudioTagging
(
const
SherpaOnnxAudioTagging
*
tagger
);
// The user has to invoke DestroyOfflineStream()
// to free the returned pointer to avoid memory leak
SHERPA_ONNX_API
const
SherpaOnnxOfflineStream
*
SherpaOnnxAudioTaggingCreateOfflineStream
(
const
SherpaOnnxAudioTagging
*
tagger
);
// Return an array of pointers. The length of the array is top_k + 1.
// If top_k is -1, then config.top_k is used, where config is the config
// used to create the input tagger.
//
// The ans[0]->prob has the largest probability among the array elements
// The last element of the array is a null pointer
//
// The user has to use SherpaOnnxAudioTaggingFreeResults()
// to free the returned pointer to avoid memory leak
SHERPA_ONNX_API
const
SherpaOnnxAudioEvent
*
const
*
SherpaOnnxAudioTaggingCompute
(
const
SherpaOnnxAudioTagging
*
tagger
,
const
SherpaOnnxOfflineStream
*
s
,
int32_t
top_k
);
SHERPA_ONNX_API
void
SherpaOnnxAudioTaggingFreeResults
(
const
SherpaOnnxAudioEvent
*
const
*
p
);
#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
...
...
请
注册
或
登录
后发表评论