Committed by
GitHub
add vad+sense voice example for C API (#1291)
正在显示
9 个修改的文件
包含
267 行增加
和
43 行删除
| @@ -99,6 +99,45 @@ jobs: | @@ -99,6 +99,45 @@ jobs: | ||
| 99 | ./run.sh | 99 | ./run.sh |
| 100 | rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | 100 | rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 |
| 101 | 101 | ||
| 102 | + - name: Test vad + sense-voice | ||
| 103 | + shell: bash | ||
| 104 | + run: | | ||
| 105 | + gcc -o vad-sense-voice-c-api ./c-api-examples/vad-sense-voice-c-api.c \ | ||
| 106 | + -I ./build/install/include \ | ||
| 107 | + -L ./build/install/lib/ \ | ||
| 108 | + -l sherpa-onnx-c-api \ | ||
| 109 | + -l onnxruntime | ||
| 110 | + | ||
| 111 | + ls -lh vad-sense-voice-c-api | ||
| 112 | + | ||
| 113 | + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then | ||
| 114 | + ldd ./vad-sense-voice-c-api | ||
| 115 | + echo "----" | ||
| 116 | + readelf -d ./vad-sense-voice-c-api | ||
| 117 | + fi | ||
| 118 | + | ||
| 119 | + # Now download models | ||
| 120 | + # | ||
| 121 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 122 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
| 123 | + | ||
| 124 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 125 | + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 126 | + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 127 | + | ||
| 128 | + ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 | ||
| 129 | + echo "---" | ||
| 130 | + ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs | ||
| 131 | + | ||
| 132 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 133 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 134 | + | ||
| 135 | + ./vad-sense-voice-c-api | ||
| 136 | + | ||
| 137 | + rm -rf sherpa-onnx-sense-voice-* | ||
| 138 | + rm -rf *.onnx | ||
| 139 | + rm *.wav | ||
| 140 | + | ||
| 102 | - name: Test sense-voice | 141 | - name: Test sense-voice |
| 103 | shell: bash | 142 | shell: bash |
| 104 | run: | | 143 | run: | |
| @@ -45,6 +45,9 @@ target_link_libraries(streaming-paraformer-c-api sherpa-onnx-c-api) | @@ -45,6 +45,9 @@ target_link_libraries(streaming-paraformer-c-api sherpa-onnx-c-api) | ||
| 45 | add_executable(telespeech-c-api telespeech-c-api.c) | 45 | add_executable(telespeech-c-api telespeech-c-api.c) |
| 46 | target_link_libraries(telespeech-c-api sherpa-onnx-c-api) | 46 | target_link_libraries(telespeech-c-api sherpa-onnx-c-api) |
| 47 | 47 | ||
| 48 | +add_executable(vad-sense-voice-c-api vad-sense-voice-c-api.c) | ||
| 49 | +target_link_libraries(vad-sense-voice-c-api sherpa-onnx-c-api) | ||
| 50 | + | ||
| 48 | if(SHERPA_ONNX_HAS_ALSA) | 51 | if(SHERPA_ONNX_HAS_ALSA) |
| 49 | add_subdirectory(./asr-microphone-example) | 52 | add_subdirectory(./asr-microphone-example) |
| 50 | elseif((UNIX AND NOT APPLE) OR LINUX) | 53 | elseif((UNIX AND NOT APPLE) OR LINUX) |
| @@ -3,7 +3,8 @@ | @@ -3,7 +3,8 @@ | ||
| 3 | // Copyright (c) 2024 Xiaomi Corporation | 3 | // Copyright (c) 2024 Xiaomi Corporation |
| 4 | 4 | ||
| 5 | // | 5 | // |
| 6 | -// This file demonstrates how to use non-streaming Paraformer with sherpa-onnx's C API. | 6 | +// This file demonstrates how to use non-streaming Paraformer with sherpa-onnx's |
| 7 | +// C API. | ||
| 7 | // clang-format off | 8 | // clang-format off |
| 8 | // | 9 | // |
| 9 | // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-small-2024-03-09.tar.bz2 | 10 | // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-small-2024-03-09.tar.bz2 |
| @@ -19,19 +20,20 @@ | @@ -19,19 +20,20 @@ | ||
| 19 | #include "sherpa-onnx/c-api/c-api.h" | 20 | #include "sherpa-onnx/c-api/c-api.h" |
| 20 | 21 | ||
| 21 | int32_t main() { | 22 | int32_t main() { |
| 22 | - | ||
| 23 | - const char *wav_filename = "sherpa-onnx-paraformer-zh-small-2024-03-09/test_wavs/0.wav"; | ||
| 24 | - const char *model_filename = "sherpa-onnx-paraformer-zh-small-2024-03-09/model.int8.onnx"; | ||
| 25 | - const char *tokens_filename = "sherpa-onnx-paraformer-zh-small-2024-03-09/tokens.txt"; | 23 | + const char *wav_filename = |
| 24 | + "sherpa-onnx-paraformer-zh-small-2024-03-09/test_wavs/0.wav"; | ||
| 25 | + const char *model_filename = | ||
| 26 | + "sherpa-onnx-paraformer-zh-small-2024-03-09/model.int8.onnx"; | ||
| 27 | + const char *tokens_filename = | ||
| 28 | + "sherpa-onnx-paraformer-zh-small-2024-03-09/tokens.txt"; | ||
| 26 | const char *provider = "cpu"; | 29 | const char *provider = "cpu"; |
| 27 | 30 | ||
| 28 | - | ||
| 29 | const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | 31 | const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); |
| 30 | if (wave == NULL) { | 32 | if (wave == NULL) { |
| 31 | fprintf(stderr, "Failed to read %s\n", wav_filename); | 33 | fprintf(stderr, "Failed to read %s\n", wav_filename); |
| 32 | return -1; | 34 | return -1; |
| 33 | } | 35 | } |
| 34 | - | 36 | + |
| 35 | // Paraformer config | 37 | // Paraformer config |
| 36 | SherpaOnnxOfflineParaformerModelConfig paraformer_config; | 38 | SherpaOnnxOfflineParaformerModelConfig paraformer_config; |
| 37 | memset(¶former_config, 0, sizeof(paraformer_config)); | 39 | memset(¶former_config, 0, sizeof(paraformer_config)); |
| @@ -19,8 +19,6 @@ | @@ -19,8 +19,6 @@ | ||
| 19 | #include "sherpa-onnx/c-api/c-api.h" | 19 | #include "sherpa-onnx/c-api/c-api.h" |
| 20 | 20 | ||
| 21 | int32_t main() { | 21 | int32_t main() { |
| 22 | - // You can find more test waves from | ||
| 23 | - // https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs | ||
| 24 | const char *wav_filename = | 22 | const char *wav_filename = |
| 25 | "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav"; | 23 | "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav"; |
| 26 | const char *model_filename = | 24 | const char *model_filename = |
| @@ -3,7 +3,8 @@ | @@ -3,7 +3,8 @@ | ||
| 3 | // Copyright (c) 2024 Xiaomi Corporation | 3 | // Copyright (c) 2024 Xiaomi Corporation |
| 4 | 4 | ||
| 5 | // | 5 | // |
| 6 | -// This file demonstrates how to use streaming Paraformer with sherpa-onnx's C API. | 6 | +// This file demonstrates how to use streaming Paraformer with sherpa-onnx's C |
| 7 | +// API. | ||
| 7 | // clang-format off | 8 | // clang-format off |
| 8 | // | 9 | // |
| 9 | // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | 10 | // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 |
| @@ -19,26 +20,27 @@ | @@ -19,26 +20,27 @@ | ||
| 19 | #include "sherpa-onnx/c-api/c-api.h" | 20 | #include "sherpa-onnx/c-api/c-api.h" |
| 20 | 21 | ||
| 21 | int32_t main() { | 22 | int32_t main() { |
| 22 | - | ||
| 23 | - const char *wav_filename = "sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav"; | ||
| 24 | - const char *encoder_filename = "sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; | ||
| 25 | - const char *decoder_filename = "sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; | ||
| 26 | - const char *tokens_filename = "sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; | 23 | + const char *wav_filename = |
| 24 | + "sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav"; | ||
| 25 | + const char *encoder_filename = | ||
| 26 | + "sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; | ||
| 27 | + const char *decoder_filename = | ||
| 28 | + "sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; | ||
| 29 | + const char *tokens_filename = | ||
| 30 | + "sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; | ||
| 27 | const char *provider = "cpu"; | 31 | const char *provider = "cpu"; |
| 28 | 32 | ||
| 29 | - | ||
| 30 | const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | 33 | const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); |
| 31 | if (wave == NULL) { | 34 | if (wave == NULL) { |
| 32 | fprintf(stderr, "Failed to read %s\n", wav_filename); | 35 | fprintf(stderr, "Failed to read %s\n", wav_filename); |
| 33 | return -1; | 36 | return -1; |
| 34 | } | 37 | } |
| 35 | - | 38 | + |
| 36 | // Paraformer config | 39 | // Paraformer config |
| 37 | SherpaOnnxOnlineParaformerModelConfig paraformer_config; | 40 | SherpaOnnxOnlineParaformerModelConfig paraformer_config; |
| 38 | memset(¶former_config, 0, sizeof(paraformer_config)); | 41 | memset(¶former_config, 0, sizeof(paraformer_config)); |
| 39 | paraformer_config.encoder = encoder_filename; | 42 | paraformer_config.encoder = encoder_filename; |
| 40 | paraformer_config.decoder = decoder_filename; | 43 | paraformer_config.decoder = decoder_filename; |
| 41 | - | ||
| 42 | 44 | ||
| 43 | // Online model config | 45 | // Online model config |
| 44 | SherpaOnnxOnlineModelConfig online_model_config; | 46 | SherpaOnnxOnlineModelConfig online_model_config; |
| @@ -3,7 +3,8 @@ | @@ -3,7 +3,8 @@ | ||
| 3 | // Copyright (c) 2024 Xiaomi Corporation | 3 | // Copyright (c) 2024 Xiaomi Corporation |
| 4 | 4 | ||
| 5 | // | 5 | // |
| 6 | -// This file demonstrates how to use streaming Zipformer with sherpa-onnx's C API. | 6 | +// This file demonstrates how to use streaming Zipformer with sherpa-onnx's C |
| 7 | +// API. | ||
| 7 | // clang-format off | 8 | // clang-format off |
| 8 | // | 9 | // |
| 9 | // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 | 10 | // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 |
| @@ -19,28 +20,33 @@ | @@ -19,28 +20,33 @@ | ||
| 19 | #include "sherpa-onnx/c-api/c-api.h" | 20 | #include "sherpa-onnx/c-api/c-api.h" |
| 20 | 21 | ||
| 21 | int32_t main() { | 22 | int32_t main() { |
| 22 | - | ||
| 23 | - const char *wav_filename = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs/0.wav"; | ||
| 24 | - const char *encoder_filename = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/encoder-epoch-99-avg-1.onnx"; | ||
| 25 | - const char *decoder_filename = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/decoder-epoch-99-avg-1.onnx"; | ||
| 26 | - const char *joiner_filename = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/joiner-epoch-99-avg-1.onnx"; | ||
| 27 | - const char *tokens_filename = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/tokens.txt"; | 23 | + const char *wav_filename = |
| 24 | + "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs/0.wav"; | ||
| 25 | + const char *encoder_filename = | ||
| 26 | + "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/" | ||
| 27 | + "encoder-epoch-99-avg-1.onnx"; | ||
| 28 | + const char *decoder_filename = | ||
| 29 | + "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/" | ||
| 30 | + "decoder-epoch-99-avg-1.onnx"; | ||
| 31 | + const char *joiner_filename = | ||
| 32 | + "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/" | ||
| 33 | + "joiner-epoch-99-avg-1.onnx"; | ||
| 34 | + const char *tokens_filename = | ||
| 35 | + "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/tokens.txt"; | ||
| 28 | const char *provider = "cpu"; | 36 | const char *provider = "cpu"; |
| 29 | 37 | ||
| 30 | - | ||
| 31 | const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | 38 | const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); |
| 32 | if (wave == NULL) { | 39 | if (wave == NULL) { |
| 33 | fprintf(stderr, "Failed to read %s\n", wav_filename); | 40 | fprintf(stderr, "Failed to read %s\n", wav_filename); |
| 34 | return -1; | 41 | return -1; |
| 35 | } | 42 | } |
| 36 | - | 43 | + |
| 37 | // Zipformer config | 44 | // Zipformer config |
| 38 | SherpaOnnxOnlineTransducerModelConfig zipformer_config; | 45 | SherpaOnnxOnlineTransducerModelConfig zipformer_config; |
| 39 | memset(&zipformer_config, 0, sizeof(zipformer_config)); | 46 | memset(&zipformer_config, 0, sizeof(zipformer_config)); |
| 40 | zipformer_config.encoder = encoder_filename; | 47 | zipformer_config.encoder = encoder_filename; |
| 41 | zipformer_config.decoder = decoder_filename; | 48 | zipformer_config.decoder = decoder_filename; |
| 42 | zipformer_config.joiner = joiner_filename; | 49 | zipformer_config.joiner = joiner_filename; |
| 43 | - | ||
| 44 | 50 | ||
| 45 | // Online model config | 51 | // Online model config |
| 46 | SherpaOnnxOnlineModelConfig online_model_config; | 52 | SherpaOnnxOnlineModelConfig online_model_config; |
| @@ -3,7 +3,8 @@ | @@ -3,7 +3,8 @@ | ||
| 3 | // Copyright (c) 2024 Xiaomi Corporation | 3 | // Copyright (c) 2024 Xiaomi Corporation |
| 4 | 4 | ||
| 5 | // | 5 | // |
| 6 | -// This file demonstrates how to use TeleSpeech-ASR CTC model with sherpa-onnx's C API. | 6 | +// This file demonstrates how to use TeleSpeech-ASR CTC model with sherpa-onnx's |
| 7 | +// C API. | ||
| 7 | // clang-format off | 8 | // clang-format off |
| 8 | // | 9 | // |
| 9 | // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 | 10 | // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 |
| @@ -19,10 +20,12 @@ | @@ -19,10 +20,12 @@ | ||
| 19 | #include "sherpa-onnx/c-api/c-api.h" | 20 | #include "sherpa-onnx/c-api/c-api.h" |
| 20 | 21 | ||
| 21 | int32_t main() { | 22 | int32_t main() { |
| 22 | - | ||
| 23 | - const char *wav_filename = "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav"; | ||
| 24 | - const char *model_filename = "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx"; | ||
| 25 | - const char *tokens_filename = "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt"; | 23 | + const char *wav_filename = |
| 24 | + "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav"; | ||
| 25 | + const char *model_filename = | ||
| 26 | + "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx"; | ||
| 27 | + const char *tokens_filename = | ||
| 28 | + "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt"; | ||
| 26 | const char *provider = "cpu"; | 29 | const char *provider = "cpu"; |
| 27 | 30 | ||
| 28 | const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | 31 | const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); |
c-api-examples/vad-sense-voice-c-api.c
0 → 100644
| 1 | +// c-api-examples/vad-sense-voice-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// | ||
| 6 | +// This file demonstrates how to use VAD + SenseVoice with sherpa-onnx's C API. | ||
| 7 | +// clang-format off | ||
| 8 | +// | ||
| 9 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 10 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
| 11 | +// | ||
| 12 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 13 | +// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 14 | +// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 15 | +// | ||
| 16 | +// clang-format on | ||
| 17 | + | ||
| 18 | +#include <stdio.h> | ||
| 19 | +#include <stdlib.h> | ||
| 20 | +#include <string.h> | ||
| 21 | + | ||
| 22 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 23 | + | ||
| 24 | +int32_t main() { | ||
| 25 | + const char *wav_filename = "./lei-jun-test.wav"; | ||
| 26 | + const char *vad_filename = "./silero_vad.onnx"; | ||
| 27 | + const char *model_filename = | ||
| 28 | + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"; | ||
| 29 | + const char *tokens_filename = | ||
| 30 | + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"; | ||
| 31 | + const char *language = "auto"; | ||
| 32 | + const char *provider = "cpu"; | ||
| 33 | + int32_t use_inverse_text_normalization = 1; | ||
| 34 | + | ||
| 35 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | ||
| 36 | + if (wave == NULL) { | ||
| 37 | + fprintf(stderr, "Failed to read %s\n", wav_filename); | ||
| 38 | + return -1; | ||
| 39 | + } | ||
| 40 | + | ||
| 41 | + if (wave->sample_rate != 16000) { | ||
| 42 | + fprintf(stderr, "Expect the sample rate to be 16000. Given: %d\n", | ||
| 43 | + wave->sample_rate); | ||
| 44 | + SherpaOnnxFreeWave(wave); | ||
| 45 | + return -1; | ||
| 46 | + } | ||
| 47 | + | ||
| 48 | + SherpaOnnxOfflineSenseVoiceModelConfig sense_voice_config; | ||
| 49 | + memset(&sense_voice_config, 0, sizeof(sense_voice_config)); | ||
| 50 | + sense_voice_config.model = model_filename; | ||
| 51 | + sense_voice_config.language = language; | ||
| 52 | + sense_voice_config.use_itn = use_inverse_text_normalization; | ||
| 53 | + | ||
| 54 | + // Offline model config | ||
| 55 | + SherpaOnnxOfflineModelConfig offline_model_config; | ||
| 56 | + memset(&offline_model_config, 0, sizeof(offline_model_config)); | ||
| 57 | + offline_model_config.debug = 0; | ||
| 58 | + offline_model_config.num_threads = 1; | ||
| 59 | + offline_model_config.provider = provider; | ||
| 60 | + offline_model_config.tokens = tokens_filename; | ||
| 61 | + offline_model_config.sense_voice = sense_voice_config; | ||
| 62 | + | ||
| 63 | + // Recognizer config | ||
| 64 | + SherpaOnnxOfflineRecognizerConfig recognizer_config; | ||
| 65 | + memset(&recognizer_config, 0, sizeof(recognizer_config)); | ||
| 66 | + recognizer_config.decoding_method = "greedy_search"; | ||
| 67 | + recognizer_config.model_config = offline_model_config; | ||
| 68 | + | ||
| 69 | + SherpaOnnxOfflineRecognizer *recognizer = | ||
| 70 | + SherpaOnnxCreateOfflineRecognizer(&recognizer_config); | ||
| 71 | + | ||
| 72 | + if (recognizer == NULL) { | ||
| 73 | + fprintf(stderr, "Please check your recognizer config!\n"); | ||
| 74 | + SherpaOnnxFreeWave(wave); | ||
| 75 | + return -1; | ||
| 76 | + } | ||
| 77 | + | ||
| 78 | + SherpaOnnxVadModelConfig vadConfig; | ||
| 79 | + memset(&vadConfig, 0, sizeof(vadConfig)); | ||
| 80 | + vadConfig.silero_vad.model = vad_filename; | ||
| 81 | + vadConfig.silero_vad.threshold = 0.5; | ||
| 82 | + vadConfig.silero_vad.min_silence_duration = 0.5; | ||
| 83 | + vadConfig.silero_vad.min_speech_duration = 0.5; | ||
| 84 | + vadConfig.silero_vad.window_size = 512; | ||
| 85 | + vadConfig.sample_rate = 16000; | ||
| 86 | + vadConfig.num_threads = 1; | ||
| 87 | + vadConfig.debug = 1; | ||
| 88 | + | ||
| 89 | + SherpaOnnxVoiceActivityDetector *vad = | ||
| 90 | + SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30); | ||
| 91 | + | ||
| 92 | + if (vad == NULL) { | ||
| 93 | + fprintf(stderr, "Please check your recognizer config!\n"); | ||
| 94 | + SherpaOnnxFreeWave(wave); | ||
| 95 | + SherpaOnnxDestroyOfflineRecognizer(recognizer); | ||
| 96 | + return -1; | ||
| 97 | + } | ||
| 98 | + | ||
| 99 | + int32_t window_size = vadConfig.silero_vad.window_size; | ||
| 100 | + int32_t i = 0; | ||
| 101 | + | ||
| 102 | + while (i + window_size < wave->num_samples) { | ||
| 103 | + SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i, | ||
| 104 | + window_size); | ||
| 105 | + i += window_size; | ||
| 106 | + | ||
| 107 | + while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) { | ||
| 108 | + const SherpaOnnxSpeechSegment *segment = | ||
| 109 | + SherpaOnnxVoiceActivityDetectorFront(vad); | ||
| 110 | + | ||
| 111 | + SherpaOnnxOfflineStream *stream = | ||
| 112 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 113 | + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, | ||
| 114 | + segment->samples, segment->n); | ||
| 115 | + | ||
| 116 | + SherpaOnnxDecodeOfflineStream(recognizer, stream); | ||
| 117 | + | ||
| 118 | + const SherpaOnnxOfflineRecognizerResult *result = | ||
| 119 | + SherpaOnnxGetOfflineStreamResult(stream); | ||
| 120 | + | ||
| 121 | + float start = segment->start / 16000.0f; | ||
| 122 | + float duration = segment->n / 16000.0f; | ||
| 123 | + float stop = start + duration; | ||
| 124 | + | ||
| 125 | + fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text); | ||
| 126 | + | ||
| 127 | + SherpaOnnxDestroyOfflineRecognizerResult(result); | ||
| 128 | + SherpaOnnxDestroyOfflineStream(stream); | ||
| 129 | + | ||
| 130 | + SherpaOnnxDestroySpeechSegment(segment); | ||
| 131 | + SherpaOnnxVoiceActivityDetectorPop(vad); | ||
| 132 | + } | ||
| 133 | + } | ||
| 134 | + | ||
| 135 | + SherpaOnnxVoiceActivityDetectorFlush(vad); | ||
| 136 | + | ||
| 137 | + while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) { | ||
| 138 | + const SherpaOnnxSpeechSegment *segment = | ||
| 139 | + SherpaOnnxVoiceActivityDetectorFront(vad); | ||
| 140 | + | ||
| 141 | + SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); | ||
| 142 | + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, segment->samples, | ||
| 143 | + segment->n); | ||
| 144 | + | ||
| 145 | + SherpaOnnxDecodeOfflineStream(recognizer, stream); | ||
| 146 | + | ||
| 147 | + const SherpaOnnxOfflineRecognizerResult *result = | ||
| 148 | + SherpaOnnxGetOfflineStreamResult(stream); | ||
| 149 | + | ||
| 150 | + float start = segment->start / 16000.0f; | ||
| 151 | + float duration = segment->n / 16000.0f; | ||
| 152 | + float stop = start + duration; | ||
| 153 | + | ||
| 154 | + fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text); | ||
| 155 | + | ||
| 156 | + SherpaOnnxDestroyOfflineRecognizerResult(result); | ||
| 157 | + SherpaOnnxDestroyOfflineStream(stream); | ||
| 158 | + | ||
| 159 | + SherpaOnnxDestroySpeechSegment(segment); | ||
| 160 | + SherpaOnnxVoiceActivityDetectorPop(vad); | ||
| 161 | + } | ||
| 162 | + | ||
| 163 | + SherpaOnnxDestroyOfflineRecognizer(recognizer); | ||
| 164 | + SherpaOnnxDestroyVoiceActivityDetector(vad); | ||
| 165 | + SherpaOnnxFreeWave(wave); | ||
| 166 | + | ||
| 167 | + return 0; | ||
| 168 | +} |
| @@ -3,7 +3,8 @@ | @@ -3,7 +3,8 @@ | ||
| 3 | // Copyright (c) 2024 Xiaomi Corporation | 3 | // Copyright (c) 2024 Xiaomi Corporation |
| 4 | 4 | ||
| 5 | // | 5 | // |
| 6 | -// This file demonstrates how to use non-streaming Zipformer with sherpa-onnx's C API. | 6 | +// This file demonstrates how to use non-streaming Zipformer with sherpa-onnx's |
| 7 | +// C API. | ||
| 7 | // clang-format off | 8 | // clang-format off |
| 8 | // | 9 | // |
| 9 | // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-small-en-2023-06-26.tar.bz2 | 10 | // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-small-en-2023-06-26.tar.bz2 |
| @@ -19,28 +20,30 @@ | @@ -19,28 +20,30 @@ | ||
| 19 | #include "sherpa-onnx/c-api/c-api.h" | 20 | #include "sherpa-onnx/c-api/c-api.h" |
| 20 | 21 | ||
| 21 | int32_t main() { | 22 | int32_t main() { |
| 22 | - | ||
| 23 | - const char *wav_filename = "sherpa-onnx-zipformer-small-en-2023-06-26/test_wavs/0.wav"; | ||
| 24 | - const char *encoder_filename = "sherpa-onnx-zipformer-small-en-2023-06-26/encoder-epoch-99-avg-1.onnx"; | ||
| 25 | - const char *decoder_filename = "sherpa-onnx-zipformer-small-en-2023-06-26/decoder-epoch-99-avg-1.onnx"; | ||
| 26 | - const char *joiner_filename = "sherpa-onnx-zipformer-small-en-2023-06-26/joiner-epoch-99-avg-1.onnx"; | ||
| 27 | - const char *tokens_filename = "sherpa-onnx-zipformer-small-en-2023-06-26/tokens.txt"; | 23 | + const char *wav_filename = |
| 24 | + "sherpa-onnx-zipformer-small-en-2023-06-26/test_wavs/0.wav"; | ||
| 25 | + const char *encoder_filename = | ||
| 26 | + "sherpa-onnx-zipformer-small-en-2023-06-26/encoder-epoch-99-avg-1.onnx"; | ||
| 27 | + const char *decoder_filename = | ||
| 28 | + "sherpa-onnx-zipformer-small-en-2023-06-26/decoder-epoch-99-avg-1.onnx"; | ||
| 29 | + const char *joiner_filename = | ||
| 30 | + "sherpa-onnx-zipformer-small-en-2023-06-26/joiner-epoch-99-avg-1.onnx"; | ||
| 31 | + const char *tokens_filename = | ||
| 32 | + "sherpa-onnx-zipformer-small-en-2023-06-26/tokens.txt"; | ||
| 28 | const char *provider = "cpu"; | 33 | const char *provider = "cpu"; |
| 29 | 34 | ||
| 30 | - | ||
| 31 | const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | 35 | const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); |
| 32 | if (wave == NULL) { | 36 | if (wave == NULL) { |
| 33 | fprintf(stderr, "Failed to read %s\n", wav_filename); | 37 | fprintf(stderr, "Failed to read %s\n", wav_filename); |
| 34 | return -1; | 38 | return -1; |
| 35 | } | 39 | } |
| 36 | - | 40 | + |
| 37 | // Zipformer config | 41 | // Zipformer config |
| 38 | SherpaOnnxOfflineTransducerModelConfig zipformer_config; | 42 | SherpaOnnxOfflineTransducerModelConfig zipformer_config; |
| 39 | memset(&zipformer_config, 0, sizeof(zipformer_config)); | 43 | memset(&zipformer_config, 0, sizeof(zipformer_config)); |
| 40 | zipformer_config.encoder = encoder_filename; | 44 | zipformer_config.encoder = encoder_filename; |
| 41 | zipformer_config.decoder = decoder_filename; | 45 | zipformer_config.decoder = decoder_filename; |
| 42 | zipformer_config.joiner = joiner_filename; | 46 | zipformer_config.joiner = joiner_filename; |
| 43 | - | ||
| 44 | 47 | ||
| 45 | // Offline model config | 48 | // Offline model config |
| 46 | SherpaOnnxOfflineModelConfig offline_model_config; | 49 | SherpaOnnxOfflineModelConfig offline_model_config; |
-
请 注册 或 登录 后发表评论