正在显示
9 个修改的文件
包含
285 行增加
和
32 行删除
| @@ -10,8 +10,21 @@ log() { | @@ -10,8 +10,21 @@ log() { | ||
| 10 | 10 | ||
| 11 | echo "SLID_EXE is $SLID_EXE" | 11 | echo "SLID_EXE is $SLID_EXE" |
| 12 | echo "SID_EXE is $SID_EXE" | 12 | echo "SID_EXE is $SID_EXE" |
| 13 | +echo "AT_EXE is $AT_EXE" | ||
| 13 | echo "PATH: $PATH" | 14 | echo "PATH: $PATH" |
| 14 | 15 | ||
| 16 | +log "------------------------------------------------------------" | ||
| 17 | +log "Test audio tagging " | ||
| 18 | +log "------------------------------------------------------------" | ||
| 19 | + | ||
| 20 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | ||
| 21 | +tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | ||
| 22 | +rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | ||
| 23 | + | ||
| 24 | +$AT_EXE | ||
| 25 | + | ||
| 26 | +rm -rf sherpa-onnx-zipformer-audio-tagging-2024-04-09 | ||
| 27 | + | ||
| 15 | 28 | ||
| 16 | log "------------------------------------------------------------" | 29 | log "------------------------------------------------------------" |
| 17 | log "Download whisper tiny for spoken language identification " | 30 | log "Download whisper tiny for spoken language identification " |
| @@ -126,6 +126,16 @@ jobs: | @@ -126,6 +126,16 @@ jobs: | ||
| 126 | name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} | 126 | name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} |
| 127 | path: build/bin/* | 127 | path: build/bin/* |
| 128 | 128 | ||
| 129 | + - name: Test C API | ||
| 130 | + shell: bash | ||
| 131 | + run: | | ||
| 132 | + export PATH=$PWD/build/bin:$PATH | ||
| 133 | + export SLID_EXE=spoken-language-identification-c-api | ||
| 134 | + export SID_EXE=speaker-identification-c-api | ||
| 135 | + export AT_EXE=audio-tagging-c-api | ||
| 136 | + | ||
| 137 | + .github/scripts/test-c-api.sh | ||
| 138 | + | ||
| 129 | - name: Test Audio tagging | 139 | - name: Test Audio tagging |
| 130 | shell: bash | 140 | shell: bash |
| 131 | run: | | 141 | run: | |
| @@ -142,14 +152,6 @@ jobs: | @@ -142,14 +152,6 @@ jobs: | ||
| 142 | 152 | ||
| 143 | .github/scripts/test-online-ctc.sh | 153 | .github/scripts/test-online-ctc.sh |
| 144 | 154 | ||
| 145 | - - name: Test C API | ||
| 146 | - shell: bash | ||
| 147 | - run: | | ||
| 148 | - export PATH=$PWD/build/bin:$PATH | ||
| 149 | - export SLID_EXE=spoken-language-identification-c-api | ||
| 150 | - export SID_EXE=speaker-identification-c-api | ||
| 151 | - | ||
| 152 | - .github/scripts/test-c-api.sh | ||
| 153 | 155 | ||
| 154 | - name: Test spoken language identification (C++ API) | 156 | - name: Test spoken language identification (C++ API) |
| 155 | shell: bash | 157 | shell: bash |
| @@ -105,22 +105,23 @@ jobs: | @@ -105,22 +105,23 @@ jobs: | ||
| 105 | otool -L build/bin/sherpa-onnx | 105 | otool -L build/bin/sherpa-onnx |
| 106 | otool -l build/bin/sherpa-onnx | 106 | otool -l build/bin/sherpa-onnx |
| 107 | 107 | ||
| 108 | - - name: Test Audio tagging | 108 | + - name: Test C API |
| 109 | shell: bash | 109 | shell: bash |
| 110 | run: | | 110 | run: | |
| 111 | export PATH=$PWD/build/bin:$PATH | 111 | export PATH=$PWD/build/bin:$PATH |
| 112 | - export EXE=sherpa-onnx-offline-audio-tagging | 112 | + export SLID_EXE=spoken-language-identification-c-api |
| 113 | + export SID_EXE=speaker-identification-c-api | ||
| 114 | + export AT_EXE=audio-tagging-c-api | ||
| 113 | 115 | ||
| 114 | - .github/scripts/test-audio-tagging.sh | 116 | + .github/scripts/test-c-api.sh |
| 115 | 117 | ||
| 116 | - - name: Test C API | 118 | + - name: Test Audio tagging |
| 117 | shell: bash | 119 | shell: bash |
| 118 | run: | | 120 | run: | |
| 119 | export PATH=$PWD/build/bin:$PATH | 121 | export PATH=$PWD/build/bin:$PATH |
| 120 | - export SLID_EXE=spoken-language-identification-c-api | ||
| 121 | - export SID_EXE=speaker-identification-c-api | 122 | + export EXE=sherpa-onnx-offline-audio-tagging |
| 122 | 123 | ||
| 123 | - .github/scripts/test-c-api.sh | 124 | + .github/scripts/test-audio-tagging.sh |
| 124 | 125 | ||
| 125 | - name: Test spoken language identification (C++ API) | 126 | - name: Test spoken language identification (C++ API) |
| 126 | shell: bash | 127 | shell: bash |
| @@ -72,22 +72,24 @@ jobs: | @@ -72,22 +72,24 @@ jobs: | ||
| 72 | 72 | ||
| 73 | ls -lh ./bin/Release/sherpa-onnx.exe | 73 | ls -lh ./bin/Release/sherpa-onnx.exe |
| 74 | 74 | ||
| 75 | - - name: Test Audio tagging | 75 | + - name: Test C API |
| 76 | shell: bash | 76 | shell: bash |
| 77 | run: | | 77 | run: | |
| 78 | export PATH=$PWD/build/bin/Release:$PATH | 78 | export PATH=$PWD/build/bin/Release:$PATH |
| 79 | - export EXE=sherpa-onnx-offline-audio-tagging.exe | 79 | + export SLID_EXE=spoken-language-identification-c-api.exe |
| 80 | + export SID_EXE=speaker-identification-c-api.exe | ||
| 81 | + export AT_EXE=audio-tagging-c-api.exe | ||
| 80 | 82 | ||
| 81 | - .github/scripts/test-audio-tagging.sh | 83 | + .github/scripts/test-c-api.sh |
| 82 | 84 | ||
| 83 | - - name: Test C API | 85 | + |
| 86 | + - name: Test Audio tagging | ||
| 84 | shell: bash | 87 | shell: bash |
| 85 | run: | | 88 | run: | |
| 86 | export PATH=$PWD/build/bin/Release:$PATH | 89 | export PATH=$PWD/build/bin/Release:$PATH |
| 87 | - export SLID_EXE=spoken-language-identification-c-api.exe | ||
| 88 | - export SID_EXE=speaker-identification-c-api.exe | 90 | + export EXE=sherpa-onnx-offline-audio-tagging.exe |
| 89 | 91 | ||
| 90 | - .github/scripts/test-c-api.sh | 92 | + .github/scripts/test-audio-tagging.sh |
| 91 | 93 | ||
| 92 | - name: Test spoken language identification (C++ API) | 94 | - name: Test spoken language identification (C++ API) |
| 93 | shell: bash | 95 | shell: bash |
| @@ -77,6 +77,8 @@ jobs: | @@ -77,6 +77,8 @@ jobs: | ||
| 77 | run: | | 77 | run: | |
| 78 | export PATH=$PWD/build/bin/Release:$PATH | 78 | export PATH=$PWD/build/bin/Release:$PATH |
| 79 | export SLID_EXE=spoken-language-identification-c-api.exe | 79 | export SLID_EXE=spoken-language-identification-c-api.exe |
| 80 | + export SID_EXE=speaker-identification-c-api.exe | ||
| 81 | + export AT_EXE=audio-tagging-c-api.exe | ||
| 80 | 82 | ||
| 81 | .github/scripts/test-c-api.sh | 83 | .github/scripts/test-c-api.sh |
| 82 | 84 |
| @@ -18,6 +18,9 @@ target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api) | @@ -18,6 +18,9 @@ target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api) | ||
| 18 | add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c) | 18 | add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c) |
| 19 | target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api) | 19 | target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api) |
| 20 | 20 | ||
| 21 | +add_executable(audio-tagging-c-api audio-tagging-c-api.c) | ||
| 22 | +target_link_libraries(audio-tagging-c-api sherpa-onnx-c-api) | ||
| 23 | + | ||
| 21 | if(SHERPA_ONNX_HAS_ALSA) | 24 | if(SHERPA_ONNX_HAS_ALSA) |
| 22 | add_subdirectory(./asr-microphone-example) | 25 | add_subdirectory(./asr-microphone-example) |
| 23 | elseif((UNIX AND NOT APPLE) OR LINUX) | 26 | elseif((UNIX AND NOT APPLE) OR LINUX) |
c-api-examples/audio-tagging-c-api.c
0 → 100644
| 1 | +// c-api-examples/audio-tagging-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// We assume you have pre-downloaded the model files for testing | ||
| 6 | +// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models | ||
| 7 | +// | ||
| 8 | +// An example is given below: | ||
| 9 | +// | ||
| 10 | +// clang-format off | ||
| 11 | +// | ||
| 12 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | ||
| 13 | +// tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | ||
| 14 | +// rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | ||
| 15 | +// | ||
| 16 | +// clang-format on | ||
| 17 | + | ||
| 18 | +#include <stdio.h> | ||
| 19 | +#include <stdlib.h> | ||
| 20 | +#include <string.h> | ||
| 21 | + | ||
| 22 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 23 | + | ||
| 24 | +int32_t main() { | ||
| 25 | + SherpaOnnxAudioTaggingConfig config; | ||
| 26 | + memset(&config, 0, sizeof(config)); | ||
| 27 | + | ||
| 28 | + config.model.zipformer.model = | ||
| 29 | + "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx"; | ||
| 30 | + config.model.num_threads = 1; | ||
| 31 | + config.model.debug = 1; | ||
| 32 | + config.model.provider = "cpu"; | ||
| 33 | + // clang-format off | ||
| 34 | + config.labels = "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv"; | ||
| 35 | + // clang-format on | ||
| 36 | + | ||
| 37 | + const SherpaOnnxAudioTagging *tagger = SherpaOnnxCreateAudioTagging(&config); | ||
| 38 | + if (!tagger) { | ||
| 39 | + fprintf(stderr, "Failed to create audio tagger. Please check your config"); | ||
| 40 | + return -1; | ||
| 41 | + } | ||
| 42 | + | ||
| 43 | + // You can find more test waves from | ||
| 44 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | ||
| 45 | + const char *wav_filename = | ||
| 46 | + "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav"; | ||
| 47 | + | ||
| 48 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | ||
| 49 | + if (wave == NULL) { | ||
| 50 | + fprintf(stderr, "Failed to read %s\n", wav_filename); | ||
| 51 | + return -1; | ||
| 52 | + } | ||
| 53 | + | ||
| 54 | + const SherpaOnnxOfflineStream *stream = | ||
| 55 | + SherpaOnnxAudioTaggingCreateOfflineStream(tagger); | ||
| 56 | + | ||
| 57 | + AcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | ||
| 58 | + wave->num_samples); | ||
| 59 | + | ||
| 60 | + int32_t top_k = 5; | ||
| 61 | + const SherpaOnnxAudioEvent *const *results = | ||
| 62 | + SherpaOnnxAudioTaggingCompute(tagger, stream, top_k); | ||
| 63 | + | ||
| 64 | + fprintf(stderr, "--------------------------------------------------\n"); | ||
| 65 | + fprintf(stderr, "Index\t\tProbability\t\tEvent name\n"); | ||
| 66 | + fprintf(stderr, "--------------------------------------------------\n"); | ||
| 67 | + for (int32_t i = 0; i != top_k; ++i) { | ||
| 68 | + fprintf(stderr, "%d\t\t%.3f\t\t\t%s\n", i, results[i]->prob, | ||
| 69 | + results[i]->name); | ||
| 70 | + } | ||
| 71 | + fprintf(stderr, "--------------------------------------------------\n"); | ||
| 72 | + | ||
| 73 | + SherpaOnnxAudioTaggingFreeResults(results); | ||
| 74 | + DestroyOfflineStream(stream); | ||
| 75 | + SherpaOnnxFreeWave(wave); | ||
| 76 | + SherpaOnnxDestroyAudioTagging(tagger); | ||
| 77 | + | ||
| 78 | + return 0; | ||
| 79 | +}; |
| @@ -10,6 +10,7 @@ | @@ -10,6 +10,7 @@ | ||
| 10 | #include <utility> | 10 | #include <utility> |
| 11 | #include <vector> | 11 | #include <vector> |
| 12 | 12 | ||
| 13 | +#include "sherpa-onnx/csrc/audio-tagging.h" | ||
| 13 | #include "sherpa-onnx/csrc/circular-buffer.h" | 14 | #include "sherpa-onnx/csrc/circular-buffer.h" |
| 14 | #include "sherpa-onnx/csrc/display.h" | 15 | #include "sherpa-onnx/csrc/display.h" |
| 15 | #include "sherpa-onnx/csrc/keyword-spotter.h" | 16 | #include "sherpa-onnx/csrc/keyword-spotter.h" |
| @@ -400,15 +401,18 @@ SherpaOnnxOfflineStream *CreateOfflineStream( | @@ -400,15 +401,18 @@ SherpaOnnxOfflineStream *CreateOfflineStream( | ||
| 400 | return stream; | 401 | return stream; |
| 401 | } | 402 | } |
| 402 | 403 | ||
| 403 | -void DestroyOfflineStream(SherpaOnnxOfflineStream *stream) { delete stream; } | 404 | +void DestroyOfflineStream(const SherpaOnnxOfflineStream *stream) { |
| 405 | + delete stream; | ||
| 406 | +} | ||
| 404 | 407 | ||
| 405 | -void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, int32_t sample_rate, | ||
| 406 | - const float *samples, int32_t n) { | 408 | +void AcceptWaveformOffline(const SherpaOnnxOfflineStream *stream, |
| 409 | + int32_t sample_rate, const float *samples, | ||
| 410 | + int32_t n) { | ||
| 407 | stream->impl->AcceptWaveform(sample_rate, samples, n); | 411 | stream->impl->AcceptWaveform(sample_rate, samples, n); |
| 408 | } | 412 | } |
| 409 | 413 | ||
| 410 | -void DecodeOfflineStream(SherpaOnnxOfflineRecognizer *recognizer, | ||
| 411 | - SherpaOnnxOfflineStream *stream) { | 414 | +void DecodeOfflineStream(const SherpaOnnxOfflineRecognizer *recognizer, |
| 415 | + const SherpaOnnxOfflineStream *stream) { | ||
| 412 | recognizer->impl->DecodeStream(stream->impl.get()); | 416 | recognizer->impl->DecodeStream(stream->impl.get()); |
| 413 | } | 417 | } |
| 414 | 418 | ||
| @@ -1209,3 +1213,89 @@ void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers( | @@ -1209,3 +1213,89 @@ void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers( | ||
| 1209 | 1213 | ||
| 1210 | delete[] names; | 1214 | delete[] names; |
| 1211 | } | 1215 | } |
| 1216 | + | ||
| 1217 | +struct SherpaOnnxAudioTagging { | ||
| 1218 | + std::unique_ptr<sherpa_onnx::AudioTagging> impl; | ||
| 1219 | +}; | ||
| 1220 | + | ||
| 1221 | +const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging( | ||
| 1222 | + const SherpaOnnxAudioTaggingConfig *config) { | ||
| 1223 | + sherpa_onnx::AudioTaggingConfig ac; | ||
| 1224 | + ac.model.zipformer.model = SHERPA_ONNX_OR(config->model.zipformer.model, ""); | ||
| 1225 | + ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); | ||
| 1226 | + ac.model.debug = config->model.debug; | ||
| 1227 | + ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); | ||
| 1228 | + ac.labels = SHERPA_ONNX_OR(config->labels, ""); | ||
| 1229 | + ac.top_k = SHERPA_ONNX_OR(config->top_k, 5); | ||
| 1230 | + | ||
| 1231 | + if (ac.model.debug) { | ||
| 1232 | + SHERPA_ONNX_LOGE("%s\n", ac.ToString().c_str()); | ||
| 1233 | + } | ||
| 1234 | + | ||
| 1235 | + if (!ac.Validate()) { | ||
| 1236 | + SHERPA_ONNX_LOGE("Errors in config"); | ||
| 1237 | + return nullptr; | ||
| 1238 | + } | ||
| 1239 | + | ||
| 1240 | + SherpaOnnxAudioTagging *tagger = new SherpaOnnxAudioTagging; | ||
| 1241 | + tagger->impl = std::make_unique<sherpa_onnx::AudioTagging>(ac); | ||
| 1242 | + | ||
| 1243 | + return tagger; | ||
| 1244 | +} | ||
| 1245 | + | ||
| 1246 | +void SherpaOnnxDestroyAudioTagging(const SherpaOnnxAudioTagging *tagger) { | ||
| 1247 | + delete tagger; | ||
| 1248 | +} | ||
| 1249 | + | ||
| 1250 | +const SherpaOnnxOfflineStream *SherpaOnnxAudioTaggingCreateOfflineStream( | ||
| 1251 | + const SherpaOnnxAudioTagging *tagger) { | ||
| 1252 | + const SherpaOnnxOfflineStream *stream = | ||
| 1253 | + new SherpaOnnxOfflineStream(tagger->impl->CreateStream()); | ||
| 1254 | + return stream; | ||
| 1255 | +} | ||
| 1256 | + | ||
| 1257 | +const SherpaOnnxAudioEvent *const *SherpaOnnxAudioTaggingCompute( | ||
| 1258 | + const SherpaOnnxAudioTagging *tagger, const SherpaOnnxOfflineStream *s, | ||
| 1259 | + int32_t top_k) { | ||
| 1260 | + std::vector<sherpa_onnx::AudioEvent> events = | ||
| 1261 | + tagger->impl->Compute(s->impl.get(), top_k); | ||
| 1262 | + | ||
| 1263 | + int32_t n = static_cast<int32_t>(events.size()); | ||
| 1264 | + SherpaOnnxAudioEvent **ans = new SherpaOnnxAudioEvent *[n + 1]; | ||
| 1265 | + ans[n] = nullptr; | ||
| 1266 | + | ||
| 1267 | + int32_t i = 0; | ||
| 1268 | + for (const auto &e : events) { | ||
| 1269 | + SherpaOnnxAudioEvent *p = new SherpaOnnxAudioEvent; | ||
| 1270 | + | ||
| 1271 | + char *name = new char[e.name.size() + 1]; | ||
| 1272 | + std::copy(e.name.begin(), e.name.end(), name); | ||
| 1273 | + name[e.name.size()] = 0; | ||
| 1274 | + | ||
| 1275 | + p->name = name; | ||
| 1276 | + | ||
| 1277 | + p->index = e.index; | ||
| 1278 | + p->prob = e.prob; | ||
| 1279 | + | ||
| 1280 | + ans[i] = p; | ||
| 1281 | + i += 1; | ||
| 1282 | + } | ||
| 1283 | + | ||
| 1284 | + return ans; | ||
| 1285 | +} | ||
| 1286 | + | ||
| 1287 | +void SherpaOnnxAudioTaggingFreeResults( | ||
| 1288 | + const SherpaOnnxAudioEvent *const *events) { | ||
| 1289 | + auto p = events; | ||
| 1290 | + | ||
| 1291 | + while (p && *p) { | ||
| 1292 | + auto e = *p; | ||
| 1293 | + | ||
| 1294 | + delete[] e->name; | ||
| 1295 | + delete e; | ||
| 1296 | + | ||
| 1297 | + ++p; | ||
| 1298 | + } | ||
| 1299 | + | ||
| 1300 | + delete[] events; | ||
| 1301 | +} |
| @@ -427,7 +427,8 @@ SHERPA_ONNX_API SherpaOnnxOfflineStream *CreateOfflineStream( | @@ -427,7 +427,8 @@ SHERPA_ONNX_API SherpaOnnxOfflineStream *CreateOfflineStream( | ||
| 427 | /// Destroy an offline stream. | 427 | /// Destroy an offline stream. |
| 428 | /// | 428 | /// |
| 429 | /// @param stream A pointer returned by CreateOfflineStream() | 429 | /// @param stream A pointer returned by CreateOfflineStream() |
| 430 | -SHERPA_ONNX_API void DestroyOfflineStream(SherpaOnnxOfflineStream *stream); | 430 | +SHERPA_ONNX_API void DestroyOfflineStream( |
| 431 | + const SherpaOnnxOfflineStream *stream); | ||
| 431 | 432 | ||
| 432 | /// Accept input audio samples and compute the features. | 433 | /// Accept input audio samples and compute the features. |
| 433 | /// The user has to invoke DecodeOfflineStream() to run the neural network and | 434 | /// The user has to invoke DecodeOfflineStream() to run the neural network and |
| @@ -442,9 +443,9 @@ SHERPA_ONNX_API void DestroyOfflineStream(SherpaOnnxOfflineStream *stream); | @@ -442,9 +443,9 @@ SHERPA_ONNX_API void DestroyOfflineStream(SherpaOnnxOfflineStream *stream); | ||
| 442 | /// @param n Number of elements in the samples array. | 443 | /// @param n Number of elements in the samples array. |
| 443 | /// | 444 | /// |
| 444 | /// @caution: For each offline stream, please invoke this function only once! | 445 | /// @caution: For each offline stream, please invoke this function only once! |
| 445 | -SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, | ||
| 446 | - int32_t sample_rate, | ||
| 447 | - const float *samples, int32_t n); | 446 | +SHERPA_ONNX_API void AcceptWaveformOffline( |
| 447 | + const SherpaOnnxOfflineStream *stream, int32_t sample_rate, | ||
| 448 | + const float *samples, int32_t n); | ||
| 448 | /// Decode an offline stream. | 449 | /// Decode an offline stream. |
| 449 | /// | 450 | /// |
| 450 | /// We assume you have invoked AcceptWaveformOffline() for the given stream | 451 | /// We assume you have invoked AcceptWaveformOffline() for the given stream |
| @@ -453,7 +454,8 @@ SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, | @@ -453,7 +454,8 @@ SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, | ||
| 453 | /// @param recognizer A pointer returned by CreateOfflineRecognizer(). | 454 | /// @param recognizer A pointer returned by CreateOfflineRecognizer(). |
| 454 | /// @param stream A pointer returned by CreateOfflineStream() | 455 | /// @param stream A pointer returned by CreateOfflineStream() |
| 455 | SHERPA_ONNX_API void DecodeOfflineStream( | 456 | SHERPA_ONNX_API void DecodeOfflineStream( |
| 456 | - SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream *stream); | 457 | + const SherpaOnnxOfflineRecognizer *recognizer, |
| 458 | + const SherpaOnnxOfflineStream *stream); | ||
| 457 | 459 | ||
| 458 | /// Decode a list offline streams in parallel. | 460 | /// Decode a list offline streams in parallel. |
| 459 | /// | 461 | /// |
| @@ -1088,6 +1090,65 @@ SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers( | @@ -1088,6 +1090,65 @@ SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers( | ||
| 1088 | SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers( | 1090 | SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers( |
| 1089 | const char *const *names); | 1091 | const char *const *names); |
| 1090 | 1092 | ||
| 1093 | +// ============================================================ | ||
| 1094 | +// For audio tagging | ||
| 1095 | +// ============================================================ | ||
| 1096 | +SHERPA_ONNX_API typedef struct | ||
| 1097 | + SherpaOnnxOfflineZipformerAudioTaggingModelConfig { | ||
| 1098 | + const char *model; | ||
| 1099 | +} SherpaOnnxOfflineZipformerAudioTaggingModelConfig; | ||
| 1100 | + | ||
| 1101 | +SHERPA_ONNX_API typedef struct SherpaOnnxAudioTaggingModelConfig { | ||
| 1102 | + SherpaOnnxOfflineZipformerAudioTaggingModelConfig zipformer; | ||
| 1103 | + int32_t num_threads; | ||
| 1104 | + int32_t debug; // true to print debug information of the model | ||
| 1105 | + const char *provider; | ||
| 1106 | +} SherpaOnnxAudioTaggingModelConfig; | ||
| 1107 | + | ||
| 1108 | +SHERPA_ONNX_API typedef struct SherpaOnnxAudioTaggingConfig { | ||
| 1109 | + SherpaOnnxAudioTaggingModelConfig model; | ||
| 1110 | + const char *labels; | ||
| 1111 | + int32_t top_k; | ||
| 1112 | +} SherpaOnnxAudioTaggingConfig; | ||
| 1113 | + | ||
| 1114 | +SHERPA_ONNX_API typedef struct SherpaOnnxAudioEvent { | ||
| 1115 | + const char *name; | ||
| 1116 | + int32_t index; | ||
| 1117 | + float prob; | ||
| 1118 | +} SherpaOnnxAudioEvent; | ||
| 1119 | + | ||
| 1120 | +SHERPA_ONNX_API typedef struct SherpaOnnxAudioTagging SherpaOnnxAudioTagging; | ||
| 1121 | + | ||
| 1122 | +// The user has to invoke | ||
| 1123 | +// SherpaOnnxDestroyAudioTagging() | ||
| 1124 | +// to free the returned pointer to avoid memory leak | ||
| 1125 | +SHERPA_ONNX_API const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging( | ||
| 1126 | + const SherpaOnnxAudioTaggingConfig *config); | ||
| 1127 | + | ||
| 1128 | +SHERPA_ONNX_API void SherpaOnnxDestroyAudioTagging( | ||
| 1129 | + const SherpaOnnxAudioTagging *tagger); | ||
| 1130 | + | ||
| 1131 | +// The user has to invoke DestroyOfflineStream() | ||
| 1132 | +// to free the returned pointer to avoid memory leak | ||
| 1133 | +SHERPA_ONNX_API const SherpaOnnxOfflineStream * | ||
| 1134 | +SherpaOnnxAudioTaggingCreateOfflineStream(const SherpaOnnxAudioTagging *tagger); | ||
| 1135 | + | ||
| 1136 | +// Return an array of pointers. The length of the array is top_k + 1. | ||
| 1137 | +// If top_k is -1, then config.top_k is used, where config is the config | ||
| 1138 | +// used to create the input tagger. | ||
| 1139 | +// | ||
| 1140 | +// The ans[0]->prob has the largest probability among the array elements | ||
| 1141 | +// The last element of the array is a null pointer | ||
| 1142 | +// | ||
| 1143 | +// The user has to use SherpaOnnxAudioTaggingFreeResults() | ||
| 1144 | +// to free the returned pointer to avoid memory leak | ||
| 1145 | +SHERPA_ONNX_API const SherpaOnnxAudioEvent *const * | ||
| 1146 | +SherpaOnnxAudioTaggingCompute(const SherpaOnnxAudioTagging *tagger, | ||
| 1147 | + const SherpaOnnxOfflineStream *s, int32_t top_k); | ||
| 1148 | + | ||
| 1149 | +SHERPA_ONNX_API void SherpaOnnxAudioTaggingFreeResults( | ||
| 1150 | + const SherpaOnnxAudioEvent *const *p); | ||
| 1151 | + | ||
| 1091 | #if defined(__GNUC__) | 1152 | #if defined(__GNUC__) |
| 1092 | #pragma GCC diagnostic pop | 1153 | #pragma GCC diagnostic pop |
| 1093 | #endif | 1154 | #endif |
-
请 注册 或 登录 后发表评论