Committed by
GitHub
Add C and CXX API for Dolphin CTC models (#2088)
正在显示
12 个修改的文件
包含
231 行增加
和
5 行删除
| @@ -79,6 +79,36 @@ jobs: | @@ -79,6 +79,36 @@ jobs: | ||
| 79 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib | 79 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib |
| 80 | fi | 80 | fi |
| 81 | 81 | ||
| 82 | + - name: Test Dolphin CTC | ||
| 83 | + shell: bash | ||
| 84 | + run: | | ||
| 85 | + name=dolphin-ctc-c-api | ||
| 86 | + gcc -o $name ./c-api-examples/$name.c \ | ||
| 87 | + -I ./build/install/include \ | ||
| 88 | + -L ./build/install/lib/ \ | ||
| 89 | + -l sherpa-onnx-c-api \ | ||
| 90 | + -l onnxruntime | ||
| 91 | + | ||
| 92 | + ls -lh $name | ||
| 93 | + | ||
| 94 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 95 | + ldd ./$name | ||
| 96 | + echo "----" | ||
| 97 | + readelf -d ./$name | ||
| 98 | + fi | ||
| 99 | + | ||
| 100 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 101 | + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 102 | + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 103 | + | ||
| 104 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 105 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 106 | + | ||
| 107 | + ./$name | ||
| 108 | + | ||
| 109 | + rm $name | ||
| 110 | + rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 111 | + | ||
| 82 | - name: Test speech enhancement (GTCRN) | 112 | - name: Test speech enhancement (GTCRN) |
| 83 | shell: bash | 113 | shell: bash |
| 84 | run: | | 114 | run: | |
| @@ -81,6 +81,38 @@ jobs: | @@ -81,6 +81,38 @@ jobs: | ||
| 81 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib | 81 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib |
| 82 | fi | 82 | fi |
| 83 | 83 | ||
| 84 | + - name: Test Dolphin CTC | ||
| 85 | + shell: bash | ||
| 86 | + run: | | ||
| 87 | + name=dolphin-ctc-cxx-api | ||
| 88 | + g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \ | ||
| 89 | + -I ./build/install/include \ | ||
| 90 | + -L ./build/install/lib/ \ | ||
| 91 | + -l sherpa-onnx-cxx-api \ | ||
| 92 | + -l sherpa-onnx-c-api \ | ||
| 93 | + -l onnxruntime | ||
| 94 | + | ||
| 95 | + ls -lh $name | ||
| 96 | + | ||
| 97 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 98 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 99 | + | ||
| 100 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 101 | + ldd ./$name | ||
| 102 | + echo "----" | ||
| 103 | + readelf -d ./$name | ||
| 104 | + fi | ||
| 105 | + | ||
| 106 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 107 | + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 108 | + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 109 | + | ||
| 110 | + ./$name | ||
| 111 | + | ||
| 112 | + rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 113 | + | ||
| 114 | + rm $name | ||
| 115 | + | ||
| 84 | - name: Test VAD | 116 | - name: Test VAD |
| 85 | shell: bash | 117 | shell: bash |
| 86 | run: | | 118 | run: | |
c-api-examples/dolphin-ctc-c-api.c
0 → 100644
| 1 | +// c-api-examples/dolphin-ctc-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// | ||
| 6 | +// This file demonstrates how to use Dolphin CTC model with sherpa-onnx's C API. | ||
| 7 | +// clang-format off | ||
| 8 | +// | ||
| 9 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 10 | +// tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 11 | +// rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 12 | +// | ||
| 13 | +// clang-format on | ||
| 14 | + | ||
| 15 | +#include <stdio.h> | ||
| 16 | +#include <stdlib.h> | ||
| 17 | +#include <string.h> | ||
| 18 | + | ||
| 19 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 20 | + | ||
| 21 | +int32_t main() { | ||
| 22 | + // clang-format off | ||
| 23 | + const char *wav_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"; | ||
| 24 | + const char *model_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"; | ||
| 25 | + const char *tokens_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"; | ||
| 26 | + // clang-format on | ||
| 27 | + | ||
| 28 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | ||
| 29 | + if (wave == NULL) { | ||
| 30 | + fprintf(stderr, "Failed to read %s\n", wav_filename); | ||
| 31 | + return -1; | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + SherpaOnnxOfflineModelConfig offline_model_config; | ||
| 35 | + memset(&offline_model_config, 0, sizeof(offline_model_config)); | ||
| 36 | + offline_model_config.debug = 1; | ||
| 37 | + offline_model_config.num_threads = 1; | ||
| 38 | + offline_model_config.provider = "cpu"; | ||
| 39 | + offline_model_config.tokens = tokens_filename; | ||
| 40 | + offline_model_config.dolphin.model = model_filename; | ||
| 41 | + | ||
| 42 | + // Recognizer config | ||
| 43 | + SherpaOnnxOfflineRecognizerConfig recognizer_config; | ||
| 44 | + memset(&recognizer_config, 0, sizeof(recognizer_config)); | ||
| 45 | + recognizer_config.decoding_method = "greedy_search"; | ||
| 46 | + recognizer_config.model_config = offline_model_config; | ||
| 47 | + | ||
| 48 | + const SherpaOnnxOfflineRecognizer *recognizer = | ||
| 49 | + SherpaOnnxCreateOfflineRecognizer(&recognizer_config); | ||
| 50 | + | ||
| 51 | + if (recognizer == NULL) { | ||
| 52 | + fprintf(stderr, "Please check your config!\n"); | ||
| 53 | + SherpaOnnxFreeWave(wave); | ||
| 54 | + return -1; | ||
| 55 | + } | ||
| 56 | + | ||
| 57 | + const SherpaOnnxOfflineStream *stream = | ||
| 58 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 59 | + | ||
| 60 | + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | ||
| 61 | + wave->num_samples); | ||
| 62 | + SherpaOnnxDecodeOfflineStream(recognizer, stream); | ||
| 63 | + const SherpaOnnxOfflineRecognizerResult *result = | ||
| 64 | + SherpaOnnxGetOfflineStreamResult(stream); | ||
| 65 | + | ||
| 66 | + fprintf(stderr, "Decoded text: %s\n", result->text); | ||
| 67 | + | ||
| 68 | + SherpaOnnxDestroyOfflineRecognizerResult(result); | ||
| 69 | + SherpaOnnxDestroyOfflineStream(stream); | ||
| 70 | + SherpaOnnxDestroyOfflineRecognizer(recognizer); | ||
| 71 | + SherpaOnnxFreeWave(wave); | ||
| 72 | + | ||
| 73 | + return 0; | ||
| 74 | +} |
| @@ -24,6 +24,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) | @@ -24,6 +24,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) | ||
| 24 | add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) | 24 | add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) |
| 25 | target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) | 25 | target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) |
| 26 | 26 | ||
| 27 | +add_executable(dolphin-ctc-cxx-api ./dolphin-ctc-cxx-api.cc) | ||
| 28 | +target_link_libraries(dolphin-ctc-cxx-api sherpa-onnx-cxx-api) | ||
| 29 | + | ||
| 27 | add_executable(vad-cxx-api ./vad-cxx-api.cc) | 30 | add_executable(vad-cxx-api ./vad-cxx-api.cc) |
| 28 | target_link_libraries(vad-cxx-api sherpa-onnx-cxx-api) | 31 | target_link_libraries(vad-cxx-api sherpa-onnx-cxx-api) |
| 29 | 32 |
cxx-api-examples/dolphin-ctc-cxx-api.cc
0 → 100644
| 1 | +// cxx-api-examples/dolphin-ctc-cxx-api.cc | ||
| 2 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +// | ||
| 5 | +// This file demonstrates how to use Dolphini CTC model with sherpa-onnx's C++ | ||
| 6 | +// API. | ||
| 7 | +// | ||
| 8 | +// clang-format off | ||
| 9 | +// | ||
| 10 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 11 | +// tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 12 | +// rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 13 | +// | ||
| 14 | +// clang-format on | ||
| 15 | + | ||
| 16 | +#include <chrono> // NOLINT | ||
| 17 | +#include <iostream> | ||
| 18 | +#include <string> | ||
| 19 | + | ||
| 20 | +#include "sherpa-onnx/c-api/cxx-api.h" | ||
| 21 | + | ||
| 22 | +int32_t main() { | ||
| 23 | + using namespace sherpa_onnx::cxx; // NOLINT | ||
| 24 | + OfflineRecognizerConfig config; | ||
| 25 | + | ||
| 26 | + // clang-format off | ||
| 27 | + config.model_config.dolphin.model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"; | ||
| 28 | + config.model_config.tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"; | ||
| 29 | + | ||
| 30 | + std::string wave_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"; | ||
| 31 | + // clang-format on | ||
| 32 | + | ||
| 33 | + config.model_config.num_threads = 1; | ||
| 34 | + | ||
| 35 | + std::cout << "Loading model\n"; | ||
| 36 | + OfflineRecognizer recongizer = OfflineRecognizer::Create(config); | ||
| 37 | + if (!recongizer.Get()) { | ||
| 38 | + std::cerr << "Please check your config\n"; | ||
| 39 | + return -1; | ||
| 40 | + } | ||
| 41 | + std::cout << "Loading model done\n"; | ||
| 42 | + | ||
| 43 | + Wave wave = ReadWave(wave_filename); | ||
| 44 | + if (wave.samples.empty()) { | ||
| 45 | + std::cerr << "Failed to read: '" << wave_filename << "'\n"; | ||
| 46 | + return -1; | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + std::cout << "Start recognition\n"; | ||
| 50 | + const auto begin = std::chrono::steady_clock::now(); | ||
| 51 | + | ||
| 52 | + OfflineStream stream = recongizer.CreateStream(); | ||
| 53 | + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), | ||
| 54 | + wave.samples.size()); | ||
| 55 | + | ||
| 56 | + recongizer.Decode(&stream); | ||
| 57 | + | ||
| 58 | + OfflineRecognizerResult result = recongizer.GetResult(&stream); | ||
| 59 | + | ||
| 60 | + const auto end = std::chrono::steady_clock::now(); | ||
| 61 | + const float elapsed_seconds = | ||
| 62 | + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin) | ||
| 63 | + .count() / | ||
| 64 | + 1000.; | ||
| 65 | + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate); | ||
| 66 | + float rtf = elapsed_seconds / duration; | ||
| 67 | + | ||
| 68 | + std::cout << "text: " << result.text << "\n"; | ||
| 69 | + printf("Number of threads: %d\n", config.model_config.num_threads); | ||
| 70 | + printf("Duration: %.3fs\n", duration); | ||
| 71 | + printf("Elapsed seconds: %.3fs\n", elapsed_seconds); | ||
| 72 | + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds, | ||
| 73 | + duration, rtf); | ||
| 74 | + | ||
| 75 | + return 0; | ||
| 76 | +} |
| @@ -333,7 +333,6 @@ def get_1st_models(): | @@ -333,7 +333,6 @@ def get_1st_models(): | ||
| 333 | rm -f bpe.model | 333 | rm -f bpe.model |
| 334 | 334 | ||
| 335 | rm -rf test_wavs | 335 | rm -rf test_wavs |
| 336 | - rm README.md | ||
| 337 | 336 | ||
| 338 | ls -lh | 337 | ls -lh |
| 339 | 338 | ||
| @@ -354,7 +353,6 @@ def get_1st_models(): | @@ -354,7 +353,6 @@ def get_1st_models(): | ||
| 354 | rm -f bpe.model | 353 | rm -f bpe.model |
| 355 | 354 | ||
| 356 | rm -rf test_wavs | 355 | rm -rf test_wavs |
| 357 | - rm README.md | ||
| 358 | 356 | ||
| 359 | ls -lh | 357 | ls -lh |
| 360 | 358 |
| @@ -277,7 +277,6 @@ def get_models(): | @@ -277,7 +277,6 @@ def get_models(): | ||
| 277 | rm -f bpe.model | 277 | rm -f bpe.model |
| 278 | 278 | ||
| 279 | rm -rf test_wavs | 279 | rm -rf test_wavs |
| 280 | - rm README.md | ||
| 281 | 280 | ||
| 282 | ls -lh | 281 | ls -lh |
| 283 | 282 | ||
| @@ -298,7 +297,6 @@ def get_models(): | @@ -298,7 +297,6 @@ def get_models(): | ||
| 298 | rm -f bpe.model | 297 | rm -f bpe.model |
| 299 | 298 | ||
| 300 | rm -rf test_wavs | 299 | rm -rf test_wavs |
| 301 | - rm README.md | ||
| 302 | 300 | ||
| 303 | ls -lh | 301 | ls -lh |
| 304 | 302 |
| @@ -448,7 +448,7 @@ def get_models(): | @@ -448,7 +448,7 @@ def get_models(): | ||
| 448 | idx=25, | 448 | idx=25, |
| 449 | lang="multi_lang", | 449 | lang="multi_lang", |
| 450 | lang2="multi_lang", | 450 | lang2="multi_lang", |
| 451 | - short_name="multi_lang", | 451 | + short_name="dolphin_base_ctc", |
| 452 | cmd=""" | 452 | cmd=""" |
| 453 | pushd $model_name | 453 | pushd $model_name |
| 454 | 454 |
| @@ -467,6 +467,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( | @@ -467,6 +467,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( | ||
| 467 | recognizer_config.model_config.fire_red_asr.decoder = | 467 | recognizer_config.model_config.fire_red_asr.decoder = |
| 468 | SHERPA_ONNX_OR(config->model_config.fire_red_asr.decoder, ""); | 468 | SHERPA_ONNX_OR(config->model_config.fire_red_asr.decoder, ""); |
| 469 | 469 | ||
| 470 | + recognizer_config.model_config.dolphin.model = | ||
| 471 | + SHERPA_ONNX_OR(config->model_config.dolphin.model, ""); | ||
| 472 | + | ||
| 470 | recognizer_config.lm_config.model = | 473 | recognizer_config.lm_config.model = |
| 471 | SHERPA_ONNX_OR(config->lm_config.model, ""); | 474 | SHERPA_ONNX_OR(config->lm_config.model, ""); |
| 472 | recognizer_config.lm_config.scale = | 475 | recognizer_config.lm_config.scale = |
| @@ -416,6 +416,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSenseVoiceModelConfig { | @@ -416,6 +416,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSenseVoiceModelConfig { | ||
| 416 | int32_t use_itn; | 416 | int32_t use_itn; |
| 417 | } SherpaOnnxOfflineSenseVoiceModelConfig; | 417 | } SherpaOnnxOfflineSenseVoiceModelConfig; |
| 418 | 418 | ||
| 419 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineDolphinModelConfig { | ||
| 420 | + const char *model; | ||
| 421 | +} SherpaOnnxOfflineDolphinModelConfig; | ||
| 422 | + | ||
| 419 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | 423 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { |
| 420 | SherpaOnnxOfflineTransducerModelConfig transducer; | 424 | SherpaOnnxOfflineTransducerModelConfig transducer; |
| 421 | SherpaOnnxOfflineParaformerModelConfig paraformer; | 425 | SherpaOnnxOfflineParaformerModelConfig paraformer; |
| @@ -438,6 +442,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | @@ -438,6 +442,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | ||
| 438 | SherpaOnnxOfflineSenseVoiceModelConfig sense_voice; | 442 | SherpaOnnxOfflineSenseVoiceModelConfig sense_voice; |
| 439 | SherpaOnnxOfflineMoonshineModelConfig moonshine; | 443 | SherpaOnnxOfflineMoonshineModelConfig moonshine; |
| 440 | SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr; | 444 | SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr; |
| 445 | + SherpaOnnxOfflineDolphinModelConfig dolphin; | ||
| 441 | } SherpaOnnxOfflineModelConfig; | 446 | } SherpaOnnxOfflineModelConfig; |
| 442 | 447 | ||
| 443 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { | 448 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { |
| @@ -246,6 +246,8 @@ OfflineRecognizer OfflineRecognizer::Create( | @@ -246,6 +246,8 @@ OfflineRecognizer OfflineRecognizer::Create( | ||
| 246 | c.model_config.fire_red_asr.decoder = | 246 | c.model_config.fire_red_asr.decoder = |
| 247 | config.model_config.fire_red_asr.decoder.c_str(); | 247 | config.model_config.fire_red_asr.decoder.c_str(); |
| 248 | 248 | ||
| 249 | + c.model_config.dolphin.model = config.model_config.dolphin.model.c_str(); | ||
| 250 | + | ||
| 249 | c.lm_config.model = config.lm_config.model.c_str(); | 251 | c.lm_config.model = config.lm_config.model.c_str(); |
| 250 | c.lm_config.scale = config.lm_config.scale; | 252 | c.lm_config.scale = config.lm_config.scale; |
| 251 | 253 |
| @@ -229,6 +229,10 @@ struct SHERPA_ONNX_API OfflineSenseVoiceModelConfig { | @@ -229,6 +229,10 @@ struct SHERPA_ONNX_API OfflineSenseVoiceModelConfig { | ||
| 229 | bool use_itn = false; | 229 | bool use_itn = false; |
| 230 | }; | 230 | }; |
| 231 | 231 | ||
| 232 | +struct SHERPA_ONNX_API OfflineDolphinModelConfig { | ||
| 233 | + std::string model; | ||
| 234 | +}; | ||
| 235 | + | ||
| 232 | struct SHERPA_ONNX_API OfflineMoonshineModelConfig { | 236 | struct SHERPA_ONNX_API OfflineMoonshineModelConfig { |
| 233 | std::string preprocessor; | 237 | std::string preprocessor; |
| 234 | std::string encoder; | 238 | std::string encoder; |
| @@ -254,6 +258,7 @@ struct SHERPA_ONNX_API OfflineModelConfig { | @@ -254,6 +258,7 @@ struct SHERPA_ONNX_API OfflineModelConfig { | ||
| 254 | OfflineSenseVoiceModelConfig sense_voice; | 258 | OfflineSenseVoiceModelConfig sense_voice; |
| 255 | OfflineMoonshineModelConfig moonshine; | 259 | OfflineMoonshineModelConfig moonshine; |
| 256 | OfflineFireRedAsrModelConfig fire_red_asr; | 260 | OfflineFireRedAsrModelConfig fire_red_asr; |
| 261 | + OfflineDolphinModelConfig dolphin; | ||
| 257 | }; | 262 | }; |
| 258 | 263 | ||
| 259 | struct SHERPA_ONNX_API OfflineLMConfig { | 264 | struct SHERPA_ONNX_API OfflineLMConfig { |
-
请 注册 或 登录 后发表评论