Fangjun Kuang
Committed by GitHub

Add C and CXX API for homophone replacer (#2156)

@@ -20,7 +20,7 @@ jobs: @@ -20,7 +20,7 @@ jobs:
20 strategy: 20 strategy:
21 fail-fast: false 21 fail-fast: false
22 matrix: 22 matrix:
23 - os: [ubuntu-20.04] 23 + os: [ubuntu-latest]
24 python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"] 24 python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
25 manylinux: [manylinux2014] #, manylinux_2_28] 25 manylinux: [manylinux2014] #, manylinux_2_28]
26 26
@@ -79,6 +79,48 @@ jobs: @@ -79,6 +79,48 @@ jobs:
79 otool -L ./install/lib/libsherpa-onnx-c-api.dylib 79 otool -L ./install/lib/libsherpa-onnx-c-api.dylib
80 fi 80 fi
81 81
  82 + - name: Test streaming zipformer with homophone replacer
  83 + shell: bash
  84 + run: |
  85 + name=streaming-zipformer-with-hr-c-api
  86 + gcc -o $name ./c-api-examples/$name.c \
  87 + -I ./build/install/include \
  88 + -L ./build/install/lib/ \
  89 + -l sherpa-onnx-c-api \
  90 + -l onnxruntime
  91 +
  92 + ls -lh $name
  93 +
  94 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  95 + ldd ./$name
  96 + echo "----"
  97 + readelf -d ./$name
  98 + fi
  99 +
  100 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  101 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  102 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  103 +
  104 + ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
  105 + echo "---"
  106 +
  107 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  108 + tar xf dict.tar.bz2
  109 + rm dict.tar.bz2
  110 +
  111 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  112 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  113 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  114 +
  115 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  116 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  117 +
  118 + ./$name
  119 +
  120 + rm -rf sherpa-onnx-streaming-zipformer-*
  121 + rm -rf dict lexicon.txt test-hr.wav replace.fst
  122 + rm -v $name
  123 +
82 - name: Test Dolphin CTC 124 - name: Test Dolphin CTC
83 shell: bash 125 shell: bash
84 run: | 126 run: |
@@ -81,6 +81,49 @@ jobs: @@ -81,6 +81,49 @@ jobs:
81 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib 81 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
82 fi 82 fi
83 83
  84 + - name: Test streaming zipformer with Homophone replacer
  85 + shell: bash
  86 + run: |
  87 + name=streaming-zipformer-with-hr-cxx-api
  88 + g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
  89 + -I ./build/install/include \
  90 + -L ./build/install/lib/ \
  91 + -l sherpa-onnx-cxx-api \
  92 + -l sherpa-onnx-c-api \
  93 + -l onnxruntime
  94 +
  95 + ls -lh $name
  96 +
  97 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  98 + ldd ./$name
  99 + echo "----"
  100 + readelf -d ./$name
  101 + fi
  102 +
  103 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  104 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  105 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  106 +
  107 + ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
  108 + echo "---"
  109 +
  110 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  111 + tar xf dict.tar.bz2
  112 + rm dict.tar.bz2
  113 +
  114 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  115 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  116 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  117 +
  118 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  119 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  120 +
  121 + ./$name
  122 +
  123 + rm -rf sherpa-onnx-streaming-zipformer-*
  124 + rm -rf dict lexicon.txt test-hr.wav replace.fst
  125 + rm -v ./$name
  126 +
84 - name: Test Dolphin CTC 127 - name: Test Dolphin CTC
85 shell: bash 128 shell: bash
86 run: | 129 run: |
@@ -56,6 +56,9 @@ target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api) @@ -56,6 +56,9 @@ target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api)
56 add_executable(sense-voice-c-api sense-voice-c-api.c) 56 add_executable(sense-voice-c-api sense-voice-c-api.c)
57 target_link_libraries(sense-voice-c-api sherpa-onnx-c-api) 57 target_link_libraries(sense-voice-c-api sherpa-onnx-c-api)
58 58
  59 +add_executable(sense-voice-with-hr-c-api sense-voice-with-hr-c-api.c)
  60 +target_link_libraries(sense-voice-with-hr-c-api sherpa-onnx-c-api)
  61 +
59 add_executable(dolphin-ctc-c-api dolphin-ctc-c-api.c) 62 add_executable(dolphin-ctc-c-api dolphin-ctc-c-api.c)
60 target_link_libraries(dolphin-ctc-c-api sherpa-onnx-c-api) 63 target_link_libraries(dolphin-ctc-c-api sherpa-onnx-c-api)
61 64
@@ -68,6 +71,9 @@ target_link_libraries(zipformer-c-api sherpa-onnx-c-api) @@ -68,6 +71,9 @@ target_link_libraries(zipformer-c-api sherpa-onnx-c-api)
68 add_executable(streaming-zipformer-c-api streaming-zipformer-c-api.c) 71 add_executable(streaming-zipformer-c-api streaming-zipformer-c-api.c)
69 target_link_libraries(streaming-zipformer-c-api sherpa-onnx-c-api) 72 target_link_libraries(streaming-zipformer-c-api sherpa-onnx-c-api)
70 73
  74 +add_executable(streaming-zipformer-with-hr-c-api streaming-zipformer-with-hr-c-api.c)
  75 +target_link_libraries(streaming-zipformer-with-hr-c-api sherpa-onnx-c-api)
  76 +
71 add_executable(paraformer-c-api paraformer-c-api.c) 77 add_executable(paraformer-c-api paraformer-c-api.c)
72 target_link_libraries(paraformer-c-api sherpa-onnx-c-api) 78 target_link_libraries(paraformer-c-api sherpa-onnx-c-api)
73 79
  1 +// c-api-examples/sense-voice-with-hr-c-api.c
  2 +//
  3 +// Copyright (c) 2024-2025 Xiaomi Corporation
  4 +
  5 +//
  6 +// This file demonstrates how to use SenseVoice with sherpa-onnx's C API
  7 +// with homophone replacer.
  8 +// clang-format off
  9 +//
  10 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  11 +// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  12 +// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  13 +//
  14 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  15 +// tar xf dict.tar.bz2
  16 +//
  17 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  18 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  19 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  20 +//
  21 +// clang-format on
  22 +
  23 +#include <stdio.h>
  24 +#include <stdlib.h>
  25 +#include <string.h>
  26 +
  27 +#include "sherpa-onnx/c-api/c-api.h"
  28 +
  29 +int32_t main() {
  30 + const char *wav_filename = "./test-hr.wav";
  31 + const char *model_filename =
  32 + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
  33 + const char *tokens_filename =
  34 + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";
  35 + const char *language = "auto";
  36 + const char *provider = "cpu";
  37 + int32_t use_inverse_text_normalization = 1;
  38 +
  39 + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  40 + if (wave == NULL) {
  41 + fprintf(stderr, "Failed to read %s\n", wav_filename);
  42 + return -1;
  43 + }
  44 +
  45 + SherpaOnnxOfflineSenseVoiceModelConfig sense_voice_config;
  46 + memset(&sense_voice_config, 0, sizeof(sense_voice_config));
  47 + sense_voice_config.model = model_filename;
  48 + sense_voice_config.language = language;
  49 + sense_voice_config.use_itn = use_inverse_text_normalization;
  50 +
  51 + // Offline model config
  52 + SherpaOnnxOfflineModelConfig offline_model_config;
  53 + memset(&offline_model_config, 0, sizeof(offline_model_config));
  54 + offline_model_config.debug = 1;
  55 + offline_model_config.num_threads = 1;
  56 + offline_model_config.provider = provider;
  57 + offline_model_config.tokens = tokens_filename;
  58 + offline_model_config.sense_voice = sense_voice_config;
  59 +
  60 + // Recognizer config
  61 + SherpaOnnxOfflineRecognizerConfig recognizer_config;
  62 + memset(&recognizer_config, 0, sizeof(recognizer_config));
  63 + recognizer_config.decoding_method = "greedy_search";
  64 + recognizer_config.model_config = offline_model_config;
  65 + recognizer_config.hr.dict_dir = "./dict";
  66 + recognizer_config.hr.lexicon = "./lexicon.txt";
  67 +
  68 + // Please see
  69 + // https://colab.research.google.com/drive/1jEaS3s8FbRJIcVQJv2EQx19EM_mnuARi?usp=sharing
  70 + // for how to generate your own replace.fst
  71 + recognizer_config.hr.rule_fsts = "./replace.fst";
  72 +
  73 + const SherpaOnnxOfflineRecognizer *recognizer =
  74 + SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
  75 +
  76 + if (recognizer == NULL) {
  77 + fprintf(stderr, "Please check your config!\n");
  78 + SherpaOnnxFreeWave(wave);
  79 + return -1;
  80 + }
  81 +
  82 + const SherpaOnnxOfflineStream *stream =
  83 + SherpaOnnxCreateOfflineStream(recognizer);
  84 +
  85 + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
  86 + wave->num_samples);
  87 + SherpaOnnxDecodeOfflineStream(recognizer, stream);
  88 + const SherpaOnnxOfflineRecognizerResult *result =
  89 + SherpaOnnxGetOfflineStreamResult(stream);
  90 +
  91 + fprintf(stderr, "Decoded text: %s\n", result->text);
  92 +
  93 + SherpaOnnxDestroyOfflineRecognizerResult(result);
  94 + SherpaOnnxDestroyOfflineStream(stream);
  95 + SherpaOnnxDestroyOfflineRecognizer(recognizer);
  96 + SherpaOnnxFreeWave(wave);
  97 +
  98 + return 0;
  99 +}
  1 +// c-api-examples/streaming-zipformer-with-hr-c-api.c
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +//
  6 +// This file demonstrates how to use streaming Zipformer with sherpa-onnx's C
  7 +// API.
  8 +// clang-format off
  9 +//
  10 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  11 +// tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  12 +// rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  13 +//
  14 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  15 +// tar xf dict.tar.bz2
  16 +//
  17 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  18 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  19 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  20 +//
  21 +// clang-format on
  22 +
  23 +#include <stdio.h>
  24 +#include <stdlib.h>
  25 +#include <string.h>
  26 +
  27 +#include "sherpa-onnx/c-api/c-api.h"
  28 +
  29 +int32_t main() {
  30 + const char *wav_filename = "test-hr.wav";
  31 +
  32 + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  33 + if (wave == NULL) {
  34 + fprintf(stderr, "Failed to read %s\n", wav_filename);
  35 + return -1;
  36 + }
  37 +
  38 + // Online model config
  39 + SherpaOnnxOnlineModelConfig online_model_config;
  40 + memset(&online_model_config, 0, sizeof(online_model_config));
  41 + online_model_config.debug = 0;
  42 + online_model_config.num_threads = 1;
  43 + online_model_config.provider = "cpu";
  44 + online_model_config.tokens =
  45 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
  46 +
  47 + online_model_config.transducer.encoder =
  48 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
  49 + "encoder-epoch-99-avg-1.int8.onnx";
  50 +
  51 + // Note: We recommend not using int8.onnx for the decoder.
  52 + online_model_config.transducer.decoder =
  53 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
  54 + "decoder-epoch-99-avg-1.onnx";
  55 +
  56 + online_model_config.transducer.joiner =
  57 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
  58 + "joiner-epoch-99-avg-1.int8.onnx";
  59 +
  60 + online_model_config.tokens =
  61 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
  62 +
  63 + online_model_config.num_threads = 1;
  64 +
  65 + // Recognizer config
  66 + SherpaOnnxOnlineRecognizerConfig recognizer_config;
  67 + memset(&recognizer_config, 0, sizeof(recognizer_config));
  68 + recognizer_config.decoding_method = "greedy_search";
  69 + recognizer_config.model_config = online_model_config;
  70 +
  71 + recognizer_config.hr.dict_dir = "./dict";
  72 + recognizer_config.hr.lexicon = "./lexicon.txt";
  73 +
  74 + // Please see
  75 + // https://colab.research.google.com/drive/1jEaS3s8FbRJIcVQJv2EQx19EM_mnuARi?usp=sharing
  76 + // for how to generate your own replace.fst
  77 + recognizer_config.hr.rule_fsts = "./replace.fst";
  78 +
  79 + const SherpaOnnxOnlineRecognizer *recognizer =
  80 + SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
  81 +
  82 + if (recognizer == NULL) {
  83 + fprintf(stderr, "Please check your config!\n");
  84 + SherpaOnnxFreeWave(wave);
  85 + return -1;
  86 + }
  87 +
  88 + const SherpaOnnxOnlineStream *stream =
  89 + SherpaOnnxCreateOnlineStream(recognizer);
  90 +
  91 + const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  92 + int32_t segment_id = 0;
  93 +
  94 +// simulate streaming. You can choose an arbitrary N
  95 +#define N 3200
  96 +
  97 + fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
  98 + wave->sample_rate, wave->num_samples,
  99 + (float)wave->num_samples / wave->sample_rate);
  100 +
  101 + int32_t k = 0;
  102 + while (k < wave->num_samples) {
  103 + int32_t start = k;
  104 + int32_t end =
  105 + (start + N > wave->num_samples) ? wave->num_samples : (start + N);
  106 + k += N;
  107 +
  108 + SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
  109 + wave->samples + start, end - start);
  110 + while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
  111 + SherpaOnnxDecodeOnlineStream(recognizer, stream);
  112 + }
  113 +
  114 + const SherpaOnnxOnlineRecognizerResult *r =
  115 + SherpaOnnxGetOnlineStreamResult(recognizer, stream);
  116 +
  117 + if (strlen(r->text)) {
  118 + SherpaOnnxPrint(display, segment_id, r->text);
  119 + }
  120 +
  121 + if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
  122 + if (strlen(r->text)) {
  123 + ++segment_id;
  124 + }
  125 + SherpaOnnxOnlineStreamReset(recognizer, stream);
  126 + }
  127 +
  128 + SherpaOnnxDestroyOnlineRecognizerResult(r);
  129 + }
  130 +
  131 + // add some tail padding
  132 + float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate
  133 + SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
  134 + 4800);
  135 +
  136 + SherpaOnnxFreeWave(wave);
  137 +
  138 + SherpaOnnxOnlineStreamInputFinished(stream);
  139 + while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
  140 + SherpaOnnxDecodeOnlineStream(recognizer, stream);
  141 + }
  142 +
  143 + const SherpaOnnxOnlineRecognizerResult *r =
  144 + SherpaOnnxGetOnlineStreamResult(recognizer, stream);
  145 +
  146 + if (strlen(r->text)) {
  147 + SherpaOnnxPrint(display, segment_id, r->text);
  148 + }
  149 +
  150 + SherpaOnnxDestroyOnlineRecognizerResult(r);
  151 +
  152 + SherpaOnnxDestroyDisplay(display);
  153 + SherpaOnnxDestroyOnlineStream(stream);
  154 + SherpaOnnxDestroyOnlineRecognizer(recognizer);
  155 + fprintf(stderr, "\n");
  156 +
  157 + return 0;
  158 +}
@@ -3,6 +3,9 @@ include_directories(${PROJECT_SOURCE_DIR}) @@ -3,6 +3,9 @@ include_directories(${PROJECT_SOURCE_DIR})
3 add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc) 3 add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc)
4 target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api) 4 target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api)
5 5
  6 +add_executable(streaming-zipformer-with-hr-cxx-api ./streaming-zipformer-with-hr-cxx-api.cc)
  7 +target_link_libraries(streaming-zipformer-with-hr-cxx-api sherpa-onnx-cxx-api)
  8 +
6 add_executable(speech-enhancement-gtcrn-cxx-api ./speech-enhancement-gtcrn-cxx-api.cc) 9 add_executable(speech-enhancement-gtcrn-cxx-api ./speech-enhancement-gtcrn-cxx-api.cc)
7 target_link_libraries(speech-enhancement-gtcrn-cxx-api sherpa-onnx-cxx-api) 10 target_link_libraries(speech-enhancement-gtcrn-cxx-api sherpa-onnx-cxx-api)
8 11
@@ -24,6 +27,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) @@ -24,6 +27,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
24 add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) 27 add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
25 target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) 28 target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
26 29
  30 +add_executable(sense-voice-with-hr-cxx-api ./sense-voice-with-hr-cxx-api.cc)
  31 +target_link_libraries(sense-voice-with-hr-cxx-api sherpa-onnx-cxx-api)
  32 +
27 add_executable(dolphin-ctc-cxx-api ./dolphin-ctc-cxx-api.cc) 33 add_executable(dolphin-ctc-cxx-api ./dolphin-ctc-cxx-api.cc)
28 target_link_libraries(dolphin-ctc-cxx-api sherpa-onnx-cxx-api) 34 target_link_libraries(dolphin-ctc-cxx-api sherpa-onnx-cxx-api)
29 35
  1 +// cxx-api-examples/sense-voice-with-hr-cxx-api.cc
  2 +//
  3 +// Copyright (c) 2024-2025 Xiaomi Corporation
  4 +
  5 +//
  6 +// This file demonstrates how to use sense voice with sherpa-onnx's C++ API.
  7 +//
  8 +// clang-format off
  9 +//
  10 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  11 +// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  12 +// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  13 +//
  14 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  15 +// tar xf dict.tar.bz2
  16 +//
  17 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  18 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  19 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  20 +//
  21 +// clang-format on
  22 +
  23 +#include <chrono> // NOLINT
  24 +#include <iostream>
  25 +#include <string>
  26 +
  27 +#include "sherpa-onnx/c-api/cxx-api.h"
  28 +
  29 +int32_t main() {
  30 + using namespace sherpa_onnx::cxx; // NOLINT
  31 + OfflineRecognizerConfig config;
  32 +
  33 + config.model_config.sense_voice.model =
  34 + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
  35 + config.model_config.sense_voice.use_itn = true;
  36 + config.model_config.sense_voice.language = "auto";
  37 + config.model_config.tokens =
  38 + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";
  39 + config.hr.dict_dir = "./dict";
  40 + config.hr.lexicon = "./lexicon.txt";
  41 +
  42 + // Please see
  43 + // https://colab.research.google.com/drive/1jEaS3s8FbRJIcVQJv2EQx19EM_mnuARi?usp=sharing
  44 + // for how to generate your own replace.fst
  45 + config.hr.rule_fsts = "./replace.fst";
  46 +
  47 + config.model_config.num_threads = 1;
  48 +
  49 + std::cout << "Loading model\n";
  50 + OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
  51 + if (!recongizer.Get()) {
  52 + std::cerr << "Please check your config\n";
  53 + return -1;
  54 + }
  55 + std::cout << "Loading model done\n";
  56 +
  57 + std::string wave_filename = "./test-hr.wav";
  58 +
  59 + Wave wave = ReadWave(wave_filename);
  60 + if (wave.samples.empty()) {
  61 + std::cerr << "Failed to read: '" << wave_filename << "'\n";
  62 + return -1;
  63 + }
  64 +
  65 + std::cout << "Start recognition\n";
  66 + const auto begin = std::chrono::steady_clock::now();
  67 +
  68 + OfflineStream stream = recongizer.CreateStream();
  69 + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
  70 + wave.samples.size());
  71 +
  72 + recongizer.Decode(&stream);
  73 +
  74 + OfflineRecognizerResult result = recongizer.GetResult(&stream);
  75 +
  76 + const auto end = std::chrono::steady_clock::now();
  77 + const float elapsed_seconds =
  78 + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
  79 + .count() /
  80 + 1000.;
  81 + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  82 + float rtf = elapsed_seconds / duration;
  83 +
  84 + std::cout << "text: " << result.text << "\n";
  85 + printf("Number of threads: %d\n", config.model_config.num_threads);
  86 + printf("Duration: %.3fs\n", duration);
  87 + printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  88 + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
  89 + duration, rtf);
  90 +
  91 + return 0;
  92 +}
  1 +// cxx-api-examples/streaming-zipformer-with-hr-cxx-api.cc
  2 +// Copyright (c) 2024-2025 Xiaomi Corporation
  3 +
  4 +//
  5 +// This file demonstrates how to use streaming Zipformer
  6 +// with sherpa-onnx's C++ API.
  7 +//
  8 +// clang-format off
  9 +//
  10 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  11 +// tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  12 +// rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  13 +//
  14 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  15 +// tar xf dict.tar.bz2
  16 +//
  17 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  18 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  19 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  20 +//
  21 +// clang-format on
  22 +
  23 +#include <chrono> // NOLINT
  24 +#include <iostream>
  25 +#include <string>
  26 +
  27 +#include "sherpa-onnx/c-api/cxx-api.h"
  28 +
  29 +int32_t main() {
  30 + using namespace sherpa_onnx::cxx; // NOLINT
  31 + OnlineRecognizerConfig config;
  32 +
  33 + // please see
  34 + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
  35 + config.model_config.transducer.encoder =
  36 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
  37 + "encoder-epoch-99-avg-1.int8.onnx";
  38 +
  39 + // Note: We recommend not using int8.onnx for the decoder.
  40 + config.model_config.transducer.decoder =
  41 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
  42 + "decoder-epoch-99-avg-1.onnx";
  43 +
  44 + config.model_config.transducer.joiner =
  45 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
  46 + "joiner-epoch-99-avg-1.int8.onnx";
  47 +
  48 + config.model_config.tokens =
  49 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
  50 +
  51 + config.model_config.num_threads = 1;
  52 +
  53 + config.hr.dict_dir = "./dict";
  54 + config.hr.lexicon = "./lexicon.txt";
  55 +
  56 + // Please see
  57 + // https://colab.research.google.com/drive/1jEaS3s8FbRJIcVQJv2EQx19EM_mnuARi?usp=sharing
  58 + // for how to generate your own replace.fst
  59 + config.hr.rule_fsts = "./replace.fst";
  60 +
  61 + std::cout << "Loading model\n";
  62 + OnlineRecognizer recongizer = OnlineRecognizer::Create(config);
  63 + if (!recongizer.Get()) {
  64 + std::cerr << "Please check your config\n";
  65 + return -1;
  66 + }
  67 + std::cout << "Loading model done\n";
  68 +
  69 + std::string wave_filename = "./test-hr.wav";
  70 + Wave wave = ReadWave(wave_filename);
  71 + if (wave.samples.empty()) {
  72 + std::cerr << "Failed to read: '" << wave_filename << "'\n";
  73 + return -1;
  74 + }
  75 +
  76 + std::cout << "Start recognition\n";
  77 + const auto begin = std::chrono::steady_clock::now();
  78 +
  79 + OnlineStream stream = recongizer.CreateStream();
  80 + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
  81 + wave.samples.size());
  82 + stream.InputFinished();
  83 +
  84 + while (recongizer.IsReady(&stream)) {
  85 + recongizer.Decode(&stream);
  86 + }
  87 +
  88 + OnlineRecognizerResult result = recongizer.GetResult(&stream);
  89 +
  90 + const auto end = std::chrono::steady_clock::now();
  91 + const float elapsed_seconds =
  92 + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
  93 + .count() /
  94 + 1000.;
  95 + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  96 + float rtf = elapsed_seconds / duration;
  97 +
  98 + std::cout << "text: " << result.text << "\n";
  99 + printf("Number of threads: %d\n", config.model_config.num_threads);
  100 + printf("Duration: %.3fs\n", duration);
  101 + printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  102 + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
  103 + duration, rtf);
  104 +
  105 + return 0;
  106 +}
@@ -153,6 +153,10 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( @@ -153,6 +153,10 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig(
153 recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); 153 recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
154 recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, ""); 154 recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
155 155
  156 + recognizer_config.hr.dict_dir = SHERPA_ONNX_OR(config->hr.dict_dir, "");
  157 + recognizer_config.hr.lexicon = SHERPA_ONNX_OR(config->hr.lexicon, "");
  158 + recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, "");
  159 +
156 if (config->model_config.debug) { 160 if (config->model_config.debug) {
157 #if __OHOS__ 161 #if __OHOS__
158 SHERPA_ONNX_LOGE("%{public}s\n", recognizer_config.ToString().c_str()); 162 SHERPA_ONNX_LOGE("%{public}s\n", recognizer_config.ToString().c_str());
@@ -494,6 +498,10 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( @@ -494,6 +498,10 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
494 recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); 498 recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
495 recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, ""); 499 recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
496 500
  501 + recognizer_config.hr.dict_dir = SHERPA_ONNX_OR(config->hr.dict_dir, "");
  502 + recognizer_config.hr.lexicon = SHERPA_ONNX_OR(config->hr.lexicon, "");
  503 + recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, "");
  504 +
497 if (config->model_config.debug) { 505 if (config->model_config.debug) {
498 #if __OHOS__ 506 #if __OHOS__
499 SHERPA_ONNX_LOGE("%{public}s\n", recognizer_config.ToString().c_str()); 507 SHERPA_ONNX_LOGE("%{public}s\n", recognizer_config.ToString().c_str());
@@ -112,6 +112,12 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineCtcFstDecoderConfig { @@ -112,6 +112,12 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineCtcFstDecoderConfig {
112 int32_t max_active; 112 int32_t max_active;
113 } SherpaOnnxOnlineCtcFstDecoderConfig; 113 } SherpaOnnxOnlineCtcFstDecoderConfig;
114 114
  115 +SHERPA_ONNX_API typedef struct SherpaOnnxHomophoneReplacerConfig {
  116 + const char *dict_dir;
  117 + const char *lexicon;
  118 + const char *rule_fsts;
  119 +} SherpaOnnxHomophoneReplacerConfig;
  120 +
115 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { 121 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
116 SherpaOnnxFeatureConfig feat_config; 122 SherpaOnnxFeatureConfig feat_config;
117 SherpaOnnxOnlineModelConfig model_config; 123 SherpaOnnxOnlineModelConfig model_config;
@@ -157,6 +163,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { @@ -157,6 +163,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
157 const char *hotwords_buf; 163 const char *hotwords_buf;
158 /// byte size excluding the tailing '\0' 164 /// byte size excluding the tailing '\0'
159 int32_t hotwords_buf_size; 165 int32_t hotwords_buf_size;
  166 + SherpaOnnxHomophoneReplacerConfig hr;
160 } SherpaOnnxOnlineRecognizerConfig; 167 } SherpaOnnxOnlineRecognizerConfig;
161 168
162 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult { 169 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult {
@@ -461,6 +468,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { @@ -461,6 +468,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
461 const char *rule_fsts; 468 const char *rule_fsts;
462 const char *rule_fars; 469 const char *rule_fars;
463 float blank_penalty; 470 float blank_penalty;
  471 +
  472 + SherpaOnnxHomophoneReplacerConfig hr;
464 } SherpaOnnxOfflineRecognizerConfig; 473 } SherpaOnnxOfflineRecognizerConfig;
465 474
466 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer 475 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer
@@ -99,6 +99,10 @@ OnlineRecognizer OnlineRecognizer::Create( @@ -99,6 +99,10 @@ OnlineRecognizer OnlineRecognizer::Create(
99 c.hotwords_buf = config.hotwords_buf.c_str(); 99 c.hotwords_buf = config.hotwords_buf.c_str();
100 c.hotwords_buf_size = config.hotwords_buf.size(); 100 c.hotwords_buf_size = config.hotwords_buf.size();
101 101
  102 + c.hr.dict_dir = config.hr.dict_dir.c_str();
  103 + c.hr.lexicon = config.hr.lexicon.c_str();
  104 + c.hr.rule_fsts = config.hr.rule_fsts.c_str();
  105 +
102 auto p = SherpaOnnxCreateOnlineRecognizer(&c); 106 auto p = SherpaOnnxCreateOnlineRecognizer(&c);
103 return OnlineRecognizer(p); 107 return OnlineRecognizer(p);
104 } 108 }
@@ -261,6 +265,10 @@ OfflineRecognizer OfflineRecognizer::Create( @@ -261,6 +265,10 @@ OfflineRecognizer OfflineRecognizer::Create(
261 265
262 c.blank_penalty = config.blank_penalty; 266 c.blank_penalty = config.blank_penalty;
263 267
  268 + c.hr.dict_dir = config.hr.dict_dir.c_str();
  269 + c.hr.lexicon = config.hr.lexicon.c_str();
  270 + c.hr.rule_fsts = config.hr.rule_fsts.c_str();
  271 +
264 auto p = SherpaOnnxCreateOfflineRecognizer(&c); 272 auto p = SherpaOnnxCreateOfflineRecognizer(&c);
265 return OfflineRecognizer(p); 273 return OfflineRecognizer(p);
266 } 274 }
@@ -55,6 +55,12 @@ struct OnlineCtcFstDecoderConfig { @@ -55,6 +55,12 @@ struct OnlineCtcFstDecoderConfig {
55 int32_t max_active = 3000; 55 int32_t max_active = 3000;
56 }; 56 };
57 57
  58 +struct HomophoneReplacerConfig {
  59 + std::string dict_dir;
  60 + std::string lexicon;
  61 + std::string rule_fsts;
  62 +};
  63 +
58 struct OnlineRecognizerConfig { 64 struct OnlineRecognizerConfig {
59 FeatureConfig feat_config; 65 FeatureConfig feat_config;
60 OnlineModelConfig model_config; 66 OnlineModelConfig model_config;
@@ -81,6 +87,7 @@ struct OnlineRecognizerConfig { @@ -81,6 +87,7 @@ struct OnlineRecognizerConfig {
81 float blank_penalty = 0; 87 float blank_penalty = 0;
82 88
83 std::string hotwords_buf; 89 std::string hotwords_buf;
  90 + HomophoneReplacerConfig hr;
84 }; 91 };
85 92
86 struct OnlineRecognizerResult { 93 struct OnlineRecognizerResult {
@@ -280,6 +287,7 @@ struct SHERPA_ONNX_API OfflineRecognizerConfig { @@ -280,6 +287,7 @@ struct SHERPA_ONNX_API OfflineRecognizerConfig {
280 std::string rule_fsts; 287 std::string rule_fsts;
281 std::string rule_fars; 288 std::string rule_fars;
282 float blank_penalty = 0; 289 float blank_penalty = 0;
  290 + HomophoneReplacerConfig hr;
283 }; 291 };
284 292
285 struct SHERPA_ONNX_API OfflineRecognizerResult { 293 struct SHERPA_ONNX_API OfflineRecognizerResult {