Fangjun Kuang
Committed by GitHub

Add C++ API for non-streaming ASR (#1456)

@@ -9,6 +9,8 @@ log() { @@ -9,6 +9,8 @@ log() {
9 } 9 }
10 10
11 echo "CXX_STREAMING_ZIPFORMER_EXE is $CXX_STREAMING_ZIPFORMER_EXE" 11 echo "CXX_STREAMING_ZIPFORMER_EXE is $CXX_STREAMING_ZIPFORMER_EXE"
  12 +echo "CXX_WHISPER_EXE is $CXX_WHISPER_EXE"
  13 +echo "CXX_SENSE_VOICE_EXE is $CXX_SENSE_VOICE_EXE"
12 echo "PATH: $PATH" 14 echo "PATH: $PATH"
13 15
14 log "------------------------------------------------------------" 16 log "------------------------------------------------------------"
@@ -19,3 +21,22 @@ tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 @@ -19,3 +21,22 @@ tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
19 rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 21 rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
20 $CXX_STREAMING_ZIPFORMER_EXE 22 $CXX_STREAMING_ZIPFORMER_EXE
21 rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 23 rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
  24 +
  25 +log "------------------------------------------------------------"
  26 +log "Test Whisper CXX API"
  27 +log "------------------------------------------------------------"
  28 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  29 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  30 +rm sherpa-onnx-whisper-tiny.en.tar.bz2
  31 +$CXX_WHISPER_EXE
  32 +rm -rf sherpa-onnx-whisper-tiny.en
  33 +
  34 +log "------------------------------------------------------------"
  35 +log "Test SenseVoice CXX API"
  36 +log "------------------------------------------------------------"
  37 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  38 +tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  39 +rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  40 +
  41 +$CXX_SENSE_VOICE_EXE
  42 +rm -rf sherpa-onnx-sense-voice-*
@@ -4,6 +4,7 @@ on: @@ -4,6 +4,7 @@ on:
4 push: 4 push:
5 branches: 5 branches:
6 - master 6 - master
  7 + - cxx-api-asr-non-streaming
7 paths: 8 paths:
8 - '.github/workflows/cxx-api.yaml' 9 - '.github/workflows/cxx-api.yaml'
9 - 'CMakeLists.txt' 10 - 'CMakeLists.txt'
@@ -82,6 +83,74 @@ jobs: @@ -82,6 +83,74 @@ jobs:
82 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib 83 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
83 fi 84 fi
84 85
  86 + - name: Test whisper
  87 + shell: bash
  88 + run: |
  89 + g++ -std=c++17 -o whisper-cxx-api ./cxx-api-examples/whisper-cxx-api.cc \
  90 + -I ./build/install/include \
  91 + -L ./build/install/lib/ \
  92 + -l sherpa-onnx-cxx-api \
  93 + -l sherpa-onnx-c-api \
  94 + -l onnxruntime
  95 +
  96 + ls -lh whisper-cxx-api
  97 +
  98 + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
  99 + ldd ./whisper-cxx-api
  100 + echo "----"
  101 + readelf -d ./whisper-cxx-api
  102 + fi
  103 +
  104 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  105 + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  106 + rm sherpa-onnx-whisper-tiny.en.tar.bz2
  107 +
  108 + ls -lh sherpa-onnx-whisper-tiny.en
  109 + echo "---"
  110 + ls -lh sherpa-onnx-whisper-tiny.en/test_wavs
  111 +
  112 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  113 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  114 +
  115 + ./whisper-cxx-api
  116 +
  117 + rm -rf sherpa-onnx-whisper-*
  118 + rm ./whisper-cxx-api
  119 +
  120 + - name: Test SenseVoice
  121 + shell: bash
  122 + run: |
  123 + g++ -std=c++17 -o sense-voice-cxx-api ./cxx-api-examples/sense-voice-cxx-api.cc \
  124 + -I ./build/install/include \
  125 + -L ./build/install/lib/ \
  126 + -l sherpa-onnx-cxx-api \
  127 + -l sherpa-onnx-c-api \
  128 + -l onnxruntime
  129 +
  130 + ls -lh sense-voice-cxx-api
  131 +
  132 + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
  133 + ldd ./sense-voice-cxx-api
  134 + echo "----"
  135 + readelf -d ./sense-voice-cxx-api
  136 + fi
  137 +
  138 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  139 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  140 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  141 +
  142 + ls -lh sherpa-onnx-sense-voice-*
  143 + echo "---"
  144 + ls -lh sherpa-onnx-sense-voice-*/test_wavs
  145 +
  146 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  147 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  148 +
  149 + ./sense-voice-cxx-api
  150 +
  151 + rm -rf sherpa-onnx-sense-voice-*
  152 + rm ./sense-voice-cxx-api
  153 +
85 - name: Test streaming zipformer 154 - name: Test streaming zipformer
86 shell: bash 155 shell: bash
87 run: | 156 run: |
@@ -155,6 +155,8 @@ jobs: @@ -155,6 +155,8 @@ jobs:
155 du -h -d1 . 155 du -h -d1 .
156 export PATH=$PWD/build/bin:$PATH 156 export PATH=$PWD/build/bin:$PATH
157 export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api 157 export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
  158 + export CXX_WHISPER_EXE=whisper-cxx-api
  159 + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api
158 160
159 .github/scripts/test-cxx-api.sh 161 .github/scripts/test-cxx-api.sh
160 du -h -d1 . 162 du -h -d1 .
@@ -127,6 +127,8 @@ jobs: @@ -127,6 +127,8 @@ jobs:
127 du -h -d1 . 127 du -h -d1 .
128 export PATH=$PWD/build/bin:$PATH 128 export PATH=$PWD/build/bin:$PATH
129 export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api 129 export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
  130 + export CXX_WHISPER_EXE=whisper-cxx-api
  131 + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api
130 132
131 .github/scripts/test-cxx-api.sh 133 .github/scripts/test-cxx-api.sh
132 du -h -d1 . 134 du -h -d1 .
@@ -81,6 +81,7 @@ jobs: @@ -81,6 +81,7 @@ jobs:
81 run: | 81 run: |
82 export PATH=$PWD/build/bin:$PATH 82 export PATH=$PWD/build/bin:$PATH
83 export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api 83 export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
  84 + export CXX_WHISPER_EXE=whisper-cxx-api
84 85
85 .github/scripts/test-cxx-api.sh 86 .github/scripts/test-cxx-api.sh
86 87
@@ -98,6 +98,8 @@ jobs: @@ -98,6 +98,8 @@ jobs:
98 run: | 98 run: |
99 export PATH=$PWD/build/bin/Release:$PATH 99 export PATH=$PWD/build/bin/Release:$PATH
100 export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe 100 export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe
  101 + export CXX_WHISPER_EXE=whisper-cxx-api.exe
  102 + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe
101 103
102 .github/scripts/test-cxx-api.sh 104 .github/scripts/test-cxx-api.sh
103 105
@@ -98,6 +98,8 @@ jobs: @@ -98,6 +98,8 @@ jobs:
98 run: | 98 run: |
99 export PATH=$PWD/build/bin/Release:$PATH 99 export PATH=$PWD/build/bin/Release:$PATH
100 export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe 100 export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe
  101 + export CXX_WHISPER_EXE=whisper-cxx-api.exe
  102 + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe
101 103
102 .github/scripts/test-cxx-api.sh 104 .github/scripts/test-cxx-api.sh
103 105
@@ -54,7 +54,7 @@ int32_t main() { @@ -54,7 +54,7 @@ int32_t main() {
54 recognizer_config.decoding_method = "greedy_search"; 54 recognizer_config.decoding_method = "greedy_search";
55 recognizer_config.model_config = offline_model_config; 55 recognizer_config.model_config = offline_model_config;
56 56
57 - SherpaOnnxOfflineRecognizer *recognizer = 57 + const SherpaOnnxOfflineRecognizer *recognizer =
58 SherpaOnnxCreateOfflineRecognizer(&recognizer_config); 58 SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
59 59
60 if (recognizer == NULL) { 60 if (recognizer == NULL) {
@@ -63,7 +63,8 @@ int32_t main() { @@ -63,7 +63,8 @@ int32_t main() {
63 return -1; 63 return -1;
64 } 64 }
65 65
66 - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); 66 + const SherpaOnnxOfflineStream *stream =
  67 + SherpaOnnxCreateOfflineStream(recognizer);
67 68
68 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, 69 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
69 wave->num_samples); 70 wave->num_samples);
@@ -56,7 +56,7 @@ int32_t main() { @@ -56,7 +56,7 @@ int32_t main() {
56 recognizer_config.decoding_method = "greedy_search"; 56 recognizer_config.decoding_method = "greedy_search";
57 recognizer_config.model_config = offline_model_config; 57 recognizer_config.model_config = offline_model_config;
58 58
59 - SherpaOnnxOfflineRecognizer *recognizer = 59 + const SherpaOnnxOfflineRecognizer *recognizer =
60 SherpaOnnxCreateOfflineRecognizer(&recognizer_config); 60 SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
61 61
62 if (recognizer == NULL) { 62 if (recognizer == NULL) {
@@ -65,7 +65,8 @@ int32_t main() { @@ -65,7 +65,8 @@ int32_t main() {
65 return -1; 65 return -1;
66 } 66 }
67 67
68 - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); 68 + const SherpaOnnxOfflineStream *stream =
  69 + SherpaOnnxCreateOfflineStream(recognizer);
69 70
70 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, 71 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
71 wave->num_samples); 72 wave->num_samples);
@@ -107,7 +107,8 @@ int32_t main() { @@ -107,7 +107,8 @@ int32_t main() {
107 return -1; 107 return -1;
108 } 108 }
109 109
110 - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); 110 + const SherpaOnnxOnlineStream *stream =
  111 + SherpaOnnxCreateOnlineStream(recognizer);
111 112
112 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); 113 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
113 int32_t segment_id = 0; 114 int32_t segment_id = 0;
@@ -108,7 +108,8 @@ int32_t main() { @@ -108,7 +108,8 @@ int32_t main() {
108 return -1; 108 return -1;
109 } 109 }
110 110
111 - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); 111 + const SherpaOnnxOnlineStream *stream =
  112 + SherpaOnnxCreateOnlineStream(recognizer);
112 113
113 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); 114 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
114 int32_t segment_id = 0; 115 int32_t segment_id = 0;
@@ -66,7 +66,8 @@ int32_t main() { @@ -66,7 +66,8 @@ int32_t main() {
66 return -1; 66 return -1;
67 } 67 }
68 68
69 - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); 69 + const SherpaOnnxOnlineStream *stream =
  70 + SherpaOnnxCreateOnlineStream(recognizer);
70 71
71 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); 72 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
72 int32_t segment_id = 0; 73 int32_t segment_id = 0;
@@ -130,7 +130,8 @@ int32_t main() { @@ -130,7 +130,8 @@ int32_t main() {
130 return -1; 130 return -1;
131 } 131 }
132 132
133 - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); 133 + const SherpaOnnxOnlineStream *stream =
  134 + SherpaOnnxCreateOnlineStream(recognizer);
134 135
135 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); 136 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
136 int32_t segment_id = 0; 137 int32_t segment_id = 0;
@@ -72,7 +72,8 @@ int32_t main() { @@ -72,7 +72,8 @@ int32_t main() {
72 return -1; 72 return -1;
73 } 73 }
74 74
75 - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); 75 + const SherpaOnnxOnlineStream *stream =
  76 + SherpaOnnxCreateOnlineStream(recognizer);
76 77
77 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); 78 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
78 int32_t segment_id = 0; 79 int32_t segment_id = 0;
@@ -49,7 +49,7 @@ int32_t main() { @@ -49,7 +49,7 @@ int32_t main() {
49 recognizer_config.decoding_method = "greedy_search"; 49 recognizer_config.decoding_method = "greedy_search";
50 recognizer_config.model_config = offline_model_config; 50 recognizer_config.model_config = offline_model_config;
51 51
52 - SherpaOnnxOfflineRecognizer *recognizer = 52 + const SherpaOnnxOfflineRecognizer *recognizer =
53 SherpaOnnxCreateOfflineRecognizer(&recognizer_config); 53 SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
54 54
55 if (recognizer == NULL) { 55 if (recognizer == NULL) {
@@ -58,7 +58,8 @@ int32_t main() { @@ -58,7 +58,8 @@ int32_t main() {
58 return -1; 58 return -1;
59 } 59 }
60 60
61 - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); 61 + const SherpaOnnxOfflineStream *stream =
  62 + SherpaOnnxCreateOfflineStream(recognizer);
62 63
63 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, 64 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
64 wave->num_samples); 65 wave->num_samples);
@@ -66,7 +66,7 @@ int32_t main() { @@ -66,7 +66,7 @@ int32_t main() {
66 recognizer_config.decoding_method = "greedy_search"; 66 recognizer_config.decoding_method = "greedy_search";
67 recognizer_config.model_config = offline_model_config; 67 recognizer_config.model_config = offline_model_config;
68 68
69 - SherpaOnnxOfflineRecognizer *recognizer = 69 + const SherpaOnnxOfflineRecognizer *recognizer =
70 SherpaOnnxCreateOfflineRecognizer(&recognizer_config); 70 SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
71 71
72 if (recognizer == NULL) { 72 if (recognizer == NULL) {
@@ -108,8 +108,9 @@ int32_t main() { @@ -108,8 +108,9 @@ int32_t main() {
108 const SherpaOnnxSpeechSegment *segment = 108 const SherpaOnnxSpeechSegment *segment =
109 SherpaOnnxVoiceActivityDetectorFront(vad); 109 SherpaOnnxVoiceActivityDetectorFront(vad);
110 110
111 - SherpaOnnxOfflineStream *stream = 111 + const SherpaOnnxOfflineStream *stream =
112 SherpaOnnxCreateOfflineStream(recognizer); 112 SherpaOnnxCreateOfflineStream(recognizer);
  113 +
113 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, 114 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
114 segment->samples, segment->n); 115 segment->samples, segment->n);
115 116
@@ -138,7 +139,9 @@ int32_t main() { @@ -138,7 +139,9 @@ int32_t main() {
138 const SherpaOnnxSpeechSegment *segment = 139 const SherpaOnnxSpeechSegment *segment =
139 SherpaOnnxVoiceActivityDetectorFront(vad); 140 SherpaOnnxVoiceActivityDetectorFront(vad);
140 141
141 - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); 142 + const SherpaOnnxOfflineStream *stream =
  143 + SherpaOnnxCreateOfflineStream(recognizer);
  144 +
142 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, segment->samples, 145 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, segment->samples,
143 segment->n); 146 segment->n);
144 147
@@ -58,7 +58,7 @@ int32_t main() { @@ -58,7 +58,7 @@ int32_t main() {
58 recognizer_config.decoding_method = "greedy_search"; 58 recognizer_config.decoding_method = "greedy_search";
59 recognizer_config.model_config = offline_model_config; 59 recognizer_config.model_config = offline_model_config;
60 60
61 - SherpaOnnxOfflineRecognizer *recognizer = 61 + const SherpaOnnxOfflineRecognizer *recognizer =
62 SherpaOnnxCreateOfflineRecognizer(&recognizer_config); 62 SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
63 63
64 if (recognizer == NULL) { 64 if (recognizer == NULL) {
@@ -69,7 +69,8 @@ int32_t main() { @@ -69,7 +69,8 @@ int32_t main() {
69 return -1; 69 return -1;
70 } 70 }
71 71
72 - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); 72 + const SherpaOnnxOfflineStream *stream =
  73 + SherpaOnnxCreateOfflineStream(recognizer);
73 74
74 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, 75 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
75 wave->num_samples); 76 wave->num_samples);
@@ -60,7 +60,7 @@ int32_t main() { @@ -60,7 +60,7 @@ int32_t main() {
60 recognizer_config.decoding_method = "greedy_search"; 60 recognizer_config.decoding_method = "greedy_search";
61 recognizer_config.model_config = offline_model_config; 61 recognizer_config.model_config = offline_model_config;
62 62
63 - SherpaOnnxOfflineRecognizer *recognizer = 63 + const SherpaOnnxOfflineRecognizer *recognizer =
64 SherpaOnnxCreateOfflineRecognizer(&recognizer_config); 64 SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
65 65
66 if (recognizer == NULL) { 66 if (recognizer == NULL) {
@@ -69,7 +69,8 @@ int32_t main() { @@ -69,7 +69,8 @@ int32_t main() {
69 return -1; 69 return -1;
70 } 70 }
71 71
72 - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); 72 + const SherpaOnnxOfflineStream *stream =
  73 + SherpaOnnxCreateOfflineStream(recognizer);
73 74
74 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, 75 SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
75 wave->num_samples); 76 wave->num_samples);
@@ -2,3 +2,9 @@ include_directories(${CMAKE_SOURCE_DIR}) @@ -2,3 +2,9 @@ include_directories(${CMAKE_SOURCE_DIR})
2 2
3 add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc) 3 add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc)
4 target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api) 4 target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api)
  5 +
  6 +add_executable(whisper-cxx-api ./whisper-cxx-api.cc)
  7 +target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api)
  8 +
  9 +add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
  10 +target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
  1 +// cxx-api-examples/sense-voice-cxx-api.cc
  2 +// Copyright (c) 2024 Xiaomi Corporation
  3 +
  4 +//
  5 +// This file demonstrates how to use sense voice with sherpa-onnx's C++ API.
  6 +//
  7 +// clang-format off
  8 +//
  9 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  10 +// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  11 +// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  12 +//
  13 +// clang-format on
  14 +
  15 +#include <chrono> // NOLINT
  16 +#include <iostream>
  17 +#include <string>
  18 +
  19 +#include "sherpa-onnx/c-api/cxx-api.h"
  20 +
  21 +int32_t main() {
  22 + using namespace sherpa_onnx::cxx;
  23 + OfflineRecognizerConfig config;
  24 +
  25 + config.model_config.sense_voice.model =
  26 + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
  27 + config.model_config.sense_voice.use_itn = true;
  28 + config.model_config.sense_voice.language = "auto";
  29 + config.model_config.tokens =
  30 + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";
  31 +
  32 + config.model_config.num_threads = 1;
  33 +
  34 + std::cout << "Loading model\n";
  35 + OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
  36 + if (!recongizer.Get()) {
  37 + std::cerr << "Please check your config\n";
  38 + return -1;
  39 + }
  40 + std::cout << "Loading model done\n";
  41 +
  42 + std::string wave_filename =
  43 + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav";
  44 +
  45 + Wave wave = ReadWave(wave_filename);
  46 + if (wave.samples.empty()) {
  47 + std::cerr << "Failed to read: '" << wave_filename << "'\n";
  48 + return -1;
  49 + }
  50 +
  51 + std::cout << "Start recognition\n";
  52 + const auto begin = std::chrono::steady_clock::now();
  53 +
  54 + OfflineStream stream = recongizer.CreateStream();
  55 + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
  56 + wave.samples.size());
  57 +
  58 + recongizer.Decode(&stream);
  59 +
  60 + OfflineRecognizerResult result = recongizer.GetResult(&stream);
  61 +
  62 + const auto end = std::chrono::steady_clock::now();
  63 + const float elapsed_seconds =
  64 + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
  65 + .count() /
  66 + 1000.;
  67 + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  68 + float rtf = elapsed_seconds / duration;
  69 +
  70 + std::cout << "text: " << result.text << "\n";
  71 + printf("Number of threads: %d\n", config.model_config.num_threads);
  72 + printf("Duration: %.3fs\n", duration);
  73 + printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  74 + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
  75 + duration, rtf);
  76 +
  77 + return 0;
  78 +}
@@ -66,6 +66,8 @@ int32_t main() { @@ -66,6 +66,8 @@ int32_t main() {
66 OnlineStream stream = recongizer.CreateStream(); 66 OnlineStream stream = recongizer.CreateStream();
67 stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), 67 stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
68 wave.samples.size()); 68 wave.samples.size());
  69 + stream.InputFinished();
  70 +
69 while (recongizer.IsReady(&stream)) { 71 while (recongizer.IsReady(&stream)) {
70 recongizer.Decode(&stream); 72 recongizer.Decode(&stream);
71 } 73 }
  1 +// cxx-api-examples/whisper-cxx-api.cc
  2 +// Copyright (c) 2024 Xiaomi Corporation
  3 +
  4 +//
  5 +// This file demonstrates how to use whisper with sherpa-onnx's C++ API.
  6 +//
  7 +// clang-format off
  8 +//
  9 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  10 +// tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  11 +// rm sherpa-onnx-whisper-tiny.en.tar.bz2
  12 +//
  13 +// clang-format on
  14 +
  15 +#include <chrono> // NOLINT
  16 +#include <iostream>
  17 +#include <string>
  18 +
  19 +#include "sherpa-onnx/c-api/cxx-api.h"
  20 +
  21 +int32_t main() {
  22 + using namespace sherpa_onnx::cxx;
  23 + OfflineRecognizerConfig config;
  24 +
  25 + config.model_config.whisper.encoder =
  26 + "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
  27 + config.model_config.whisper.decoder =
  28 + "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
  29 + config.model_config.tokens =
  30 + "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";
  31 +
  32 + config.model_config.num_threads = 1;
  33 +
  34 + std::cout << "Loading model\n";
  35 + OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
  36 + if (!recongizer.Get()) {
  37 + std::cerr << "Please check your config\n";
  38 + return -1;
  39 + }
  40 + std::cout << "Loading model done\n";
  41 +
  42 + std::string wave_filename = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav";
  43 + Wave wave = ReadWave(wave_filename);
  44 + if (wave.samples.empty()) {
  45 + std::cerr << "Failed to read: '" << wave_filename << "'\n";
  46 + return -1;
  47 + }
  48 +
  49 + std::cout << "Start recognition\n";
  50 + const auto begin = std::chrono::steady_clock::now();
  51 +
  52 + OfflineStream stream = recongizer.CreateStream();
  53 + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
  54 + wave.samples.size());
  55 +
  56 + recongizer.Decode(&stream);
  57 +
  58 + OfflineRecognizerResult result = recongizer.GetResult(&stream);
  59 +
  60 + const auto end = std::chrono::steady_clock::now();
  61 + const float elapsed_seconds =
  62 + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
  63 + .count() /
  64 + 1000.;
  65 + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  66 + float rtf = elapsed_seconds / duration;
  67 +
  68 + std::cout << "text: " << result.text << "\n";
  69 + printf("Number of threads: %d\n", config.model_config.num_threads);
  70 + printf("Duration: %.3fs\n", duration);
  71 + printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  72 + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
  73 + duration, rtf);
  74 +
  75 + return 0;
  76 +}
@@ -320,7 +320,8 @@ int main(int argc, char **argv) { @@ -320,7 +320,8 @@ int main(int argc, char **argv) {
320 320
321 const SherpaOnnxOnlineRecognizer *recognizer = 321 const SherpaOnnxOnlineRecognizer *recognizer =
322 SherpaOnnxCreateOnlineRecognizer(&config); 322 SherpaOnnxCreateOnlineRecognizer(&config);
323 - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); 323 + const SherpaOnnxOnlineStream *stream =
  324 + SherpaOnnxCreateOnlineStream(recognizer);
324 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); 325 const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
325 int32_t segment_id = 0; 326 int32_t segment_id = 0;
326 327
@@ -256,7 +256,7 @@ void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() { @@ -256,7 +256,7 @@ void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() {
256 } 256 }
257 pa_stream_ = nullptr; 257 pa_stream_ = nullptr;
258 258
259 - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer_); 259 + const SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer_);
260 260
261 SherpaOnnxAcceptWaveformOffline(stream, config_.feat_config.sample_rate, 261 SherpaOnnxAcceptWaveformOffline(stream, config_.feat_config.sample_rate,
262 samples_.data(), static_cast<int32_t>(samples_.size())); 262 samples_.data(), static_cast<int32_t>(samples_.size()));
@@ -48,7 +48,7 @@ class CNonStreamingSpeechRecognitionDlg : public CDialogEx { @@ -48,7 +48,7 @@ class CNonStreamingSpeechRecognitionDlg : public CDialogEx {
48 private: 48 private:
49 Microphone mic_; 49 Microphone mic_;
50 50
51 - SherpaOnnxOfflineRecognizer *recognizer_ = nullptr; 51 + const SherpaOnnxOfflineRecognizer *recognizer_ = nullptr;
52 SherpaOnnxOfflineRecognizerConfig config_; 52 SherpaOnnxOfflineRecognizerConfig config_;
53 53
54 PaStream *pa_stream_ = nullptr; 54 PaStream *pa_stream_ = nullptr;
@@ -203,7 +203,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { @@ -203,7 +203,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
203 SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars); 203 SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
204 SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty); 204 SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
205 205
206 - SherpaOnnxOfflineRecognizer *recognizer = 206 + const SherpaOnnxOfflineRecognizer *recognizer =
207 SherpaOnnxCreateOfflineRecognizer(&c); 207 SherpaOnnxCreateOfflineRecognizer(&c);
208 208
209 if (c.model_config.transducer.encoder) { 209 if (c.model_config.transducer.encoder) {
@@ -306,7 +306,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { @@ -306,7 +306,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
306 } 306 }
307 307
308 return Napi::External<SherpaOnnxOfflineRecognizer>::New( 308 return Napi::External<SherpaOnnxOfflineRecognizer>::New(
309 - env, recognizer, 309 + env, const_cast<SherpaOnnxOfflineRecognizer *>(recognizer),
310 [](Napi::Env env, SherpaOnnxOfflineRecognizer *recognizer) { 310 [](Napi::Env env, SherpaOnnxOfflineRecognizer *recognizer) {
311 SherpaOnnxDestroyOfflineRecognizer(recognizer); 311 SherpaOnnxDestroyOfflineRecognizer(recognizer);
312 }); 312 });
@@ -336,10 +336,12 @@ static Napi::External<SherpaOnnxOfflineStream> CreateOfflineStreamWrapper( @@ -336,10 +336,12 @@ static Napi::External<SherpaOnnxOfflineStream> CreateOfflineStreamWrapper(
336 SherpaOnnxOfflineRecognizer *recognizer = 336 SherpaOnnxOfflineRecognizer *recognizer =
337 info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data(); 337 info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
338 338
339 - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); 339 + const SherpaOnnxOfflineStream *stream =
  340 + SherpaOnnxCreateOfflineStream(recognizer);
340 341
341 return Napi::External<SherpaOnnxOfflineStream>::New( 342 return Napi::External<SherpaOnnxOfflineStream>::New(
342 - env, stream, [](Napi::Env env, SherpaOnnxOfflineStream *stream) { 343 + env, const_cast<SherpaOnnxOfflineStream>(stream),
  344 + [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
343 SherpaOnnxDestroyOfflineStream(stream); 345 SherpaOnnxDestroyOfflineStream(stream);
344 }); 346 });
345 } 347 }
@@ -168,14 +168,14 @@ void SherpaOnnxDestroyOnlineRecognizer( @@ -168,14 +168,14 @@ void SherpaOnnxDestroyOnlineRecognizer(
168 delete recognizer; 168 delete recognizer;
169 } 169 }
170 170
171 -SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream( 171 +const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream(
172 const SherpaOnnxOnlineRecognizer *recognizer) { 172 const SherpaOnnxOnlineRecognizer *recognizer) {
173 SherpaOnnxOnlineStream *stream = 173 SherpaOnnxOnlineStream *stream =
174 new SherpaOnnxOnlineStream(recognizer->impl->CreateStream()); 174 new SherpaOnnxOnlineStream(recognizer->impl->CreateStream());
175 return stream; 175 return stream;
176 } 176 }
177 177
178 -SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStreamWithHotwords( 178 +const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStreamWithHotwords(
179 const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords) { 179 const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords) {
180 SherpaOnnxOnlineStream *stream = 180 SherpaOnnxOnlineStream *stream =
181 new SherpaOnnxOnlineStream(recognizer->impl->CreateStream(hotwords)); 181 new SherpaOnnxOnlineStream(recognizer->impl->CreateStream(hotwords));
@@ -351,7 +351,7 @@ struct SherpaOnnxOfflineStream { @@ -351,7 +351,7 @@ struct SherpaOnnxOfflineStream {
351 static sherpa_onnx::OfflineRecognizerConfig convertConfig( 351 static sherpa_onnx::OfflineRecognizerConfig convertConfig(
352 const SherpaOnnxOfflineRecognizerConfig *config); 352 const SherpaOnnxOfflineRecognizerConfig *config);
353 353
354 -SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizer( 354 +const SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizer(
355 const SherpaOnnxOfflineRecognizerConfig *config) { 355 const SherpaOnnxOfflineRecognizerConfig *config) {
356 sherpa_onnx::OfflineRecognizerConfig recognizer_config = 356 sherpa_onnx::OfflineRecognizerConfig recognizer_config =
357 convertConfig(config); 357 convertConfig(config);
@@ -490,11 +490,11 @@ void SherpaOnnxOfflineRecognizerSetConfig( @@ -490,11 +490,11 @@ void SherpaOnnxOfflineRecognizerSetConfig(
490 } 490 }
491 491
492 void SherpaOnnxDestroyOfflineRecognizer( 492 void SherpaOnnxDestroyOfflineRecognizer(
493 - SherpaOnnxOfflineRecognizer *recognizer) { 493 + const SherpaOnnxOfflineRecognizer *recognizer) {
494 delete recognizer; 494 delete recognizer;
495 } 495 }
496 496
497 -SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream( 497 +const SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream(
498 const SherpaOnnxOfflineRecognizer *recognizer) { 498 const SherpaOnnxOfflineRecognizer *recognizer) {
499 SherpaOnnxOfflineStream *stream = 499 SherpaOnnxOfflineStream *stream =
500 new SherpaOnnxOfflineStream(recognizer->impl->CreateStream()); 500 new SherpaOnnxOfflineStream(recognizer->impl->CreateStream());
@@ -518,8 +518,8 @@ void SherpaOnnxDecodeOfflineStream( @@ -518,8 +518,8 @@ void SherpaOnnxDecodeOfflineStream(
518 } 518 }
519 519
520 void SherpaOnnxDecodeMultipleOfflineStreams( 520 void SherpaOnnxDecodeMultipleOfflineStreams(
521 - SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams,  
522 - int32_t n) { 521 + const SherpaOnnxOfflineRecognizer *recognizer,
  522 + const SherpaOnnxOfflineStream **streams, int32_t n) {
523 std::vector<sherpa_onnx::OfflineStream *> ss(n); 523 std::vector<sherpa_onnx::OfflineStream *> ss(n);
524 for (int32_t i = 0; i != n; ++i) { 524 for (int32_t i = 0; i != n; ++i) {
525 ss[i] = streams[i]->impl.get(); 525 ss[i] = streams[i]->impl.get();
@@ -220,7 +220,7 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOnlineRecognizer( @@ -220,7 +220,7 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOnlineRecognizer(
220 /// @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer() 220 /// @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer()
221 /// @return Return a pointer to an OnlineStream. The user has to invoke 221 /// @return Return a pointer to an OnlineStream. The user has to invoke
222 /// SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak. 222 /// SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak.
223 -SHERPA_ONNX_API SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream( 223 +SHERPA_ONNX_API const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream(
224 const SherpaOnnxOnlineRecognizer *recognizer); 224 const SherpaOnnxOnlineRecognizer *recognizer);
225 225
226 /// Create an online stream for accepting wave samples with the specified hot 226 /// Create an online stream for accepting wave samples with the specified hot
@@ -229,7 +229,7 @@ SHERPA_ONNX_API SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream( @@ -229,7 +229,7 @@ SHERPA_ONNX_API SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream(
229 /// @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer() 229 /// @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer()
230 /// @return Return a pointer to an OnlineStream. The user has to invoke 230 /// @return Return a pointer to an OnlineStream. The user has to invoke
231 /// SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak. 231 /// SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak.
232 -SHERPA_ONNX_API SherpaOnnxOnlineStream * 232 +SHERPA_ONNX_API const SherpaOnnxOnlineStream *
233 SherpaOnnxCreateOnlineStreamWithHotwords( 233 SherpaOnnxCreateOnlineStreamWithHotwords(
234 const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords); 234 const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords);
235 235
@@ -453,7 +453,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream; @@ -453,7 +453,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;
453 /// @return Return a pointer to the recognizer. The user has to invoke 453 /// @return Return a pointer to the recognizer. The user has to invoke
454 // SherpaOnnxDestroyOfflineRecognizer() to free it to avoid memory 454 // SherpaOnnxDestroyOfflineRecognizer() to free it to avoid memory
455 // leak. 455 // leak.
456 -SHERPA_ONNX_API SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizer( 456 +SHERPA_ONNX_API const SherpaOnnxOfflineRecognizer *
  457 +SherpaOnnxCreateOfflineRecognizer(
457 const SherpaOnnxOfflineRecognizerConfig *config); 458 const SherpaOnnxOfflineRecognizerConfig *config);
458 459
459 /// @param config Config for the recognizer. 460 /// @param config Config for the recognizer.
@@ -465,14 +466,14 @@ SHERPA_ONNX_API void SherpaOnnxOfflineRecognizerSetConfig( @@ -465,14 +466,14 @@ SHERPA_ONNX_API void SherpaOnnxOfflineRecognizerSetConfig(
465 /// 466 ///
466 /// @param p A pointer returned by SherpaOnnxCreateOfflineRecognizer() 467 /// @param p A pointer returned by SherpaOnnxCreateOfflineRecognizer()
467 SHERPA_ONNX_API void SherpaOnnxDestroyOfflineRecognizer( 468 SHERPA_ONNX_API void SherpaOnnxDestroyOfflineRecognizer(
468 - SherpaOnnxOfflineRecognizer *recognizer); 469 + const SherpaOnnxOfflineRecognizer *recognizer);
469 470
470 /// Create an offline stream for accepting wave samples. 471 /// Create an offline stream for accepting wave samples.
471 /// 472 ///
472 /// @param recognizer A pointer returned by SherpaOnnxCreateOfflineRecognizer() 473 /// @param recognizer A pointer returned by SherpaOnnxCreateOfflineRecognizer()
473 /// @return Return a pointer to an OfflineStream. The user has to invoke 474 /// @return Return a pointer to an OfflineStream. The user has to invoke
474 /// SherpaOnnxDestroyOfflineStream() to free it to avoid memory leak. 475 /// SherpaOnnxDestroyOfflineStream() to free it to avoid memory leak.
475 -SHERPA_ONNX_API SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream( 476 +SHERPA_ONNX_API const SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream(
476 const SherpaOnnxOfflineRecognizer *recognizer); 477 const SherpaOnnxOfflineRecognizer *recognizer);
477 478
478 /// Destroy an offline stream. 479 /// Destroy an offline stream.
@@ -518,8 +519,8 @@ SHERPA_ONNX_API void SherpaOnnxDecodeOfflineStream( @@ -518,8 +519,8 @@ SHERPA_ONNX_API void SherpaOnnxDecodeOfflineStream(
518 /// by SherpaOnnxCreateOfflineStream(). 519 /// by SherpaOnnxCreateOfflineStream().
519 /// @param n Number of entries in the given streams. 520 /// @param n Number of entries in the given streams.
520 SHERPA_ONNX_API void SherpaOnnxDecodeMultipleOfflineStreams( 521 SHERPA_ONNX_API void SherpaOnnxDecodeMultipleOfflineStreams(
521 - SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams,  
522 - int32_t n); 522 + const SherpaOnnxOfflineRecognizer *recognizer,
  523 + const SherpaOnnxOfflineStream **streams, int32_t n);
523 524
524 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult { 525 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult {
525 const char *text; 526 const char *text;
@@ -36,6 +36,10 @@ void OnlineStream::AcceptWaveform(int32_t sample_rate, const float *samples, @@ -36,6 +36,10 @@ void OnlineStream::AcceptWaveform(int32_t sample_rate, const float *samples,
36 SherpaOnnxOnlineStreamAcceptWaveform(p_, sample_rate, samples, n); 36 SherpaOnnxOnlineStreamAcceptWaveform(p_, sample_rate, samples, n);
37 } 37 }
38 38
  39 +void OnlineStream::InputFinished() const {
  40 + SherpaOnnxOnlineStreamInputFinished(p_);
  41 +}
  42 +
39 OnlineRecognizer OnlineRecognizer::Create( 43 OnlineRecognizer OnlineRecognizer::Create(
40 const OnlineRecognizerConfig &config) { 44 const OnlineRecognizerConfig &config) {
41 struct SherpaOnnxOnlineRecognizerConfig c; 45 struct SherpaOnnxOnlineRecognizerConfig c;
@@ -119,6 +123,14 @@ void OnlineRecognizer::Decode(const OnlineStream *s) const { @@ -119,6 +123,14 @@ void OnlineRecognizer::Decode(const OnlineStream *s) const {
119 SherpaOnnxDecodeOnlineStream(p_, s->Get()); 123 SherpaOnnxDecodeOnlineStream(p_, s->Get());
120 } 124 }
121 125
  126 +void OnlineRecognizer::Reset(const OnlineStream *s) const {
  127 + SherpaOnnxOnlineStreamReset(p_, s->Get());
  128 +}
  129 +
  130 +bool OnlineRecognizer::IsEndpoint(const OnlineStream *s) const {
  131 + return SherpaOnnxOnlineStreamIsEndpoint(p_, s->Get());
  132 +}
  133 +
122 void OnlineRecognizer::Decode(const OnlineStream *ss, int32_t n) const { 134 void OnlineRecognizer::Decode(const OnlineStream *ss, int32_t n) const {
123 if (n <= 0) { 135 if (n <= 0) {
124 return; 136 return;
@@ -156,4 +168,138 @@ OnlineRecognizerResult OnlineRecognizer::GetResult( @@ -156,4 +168,138 @@ OnlineRecognizerResult OnlineRecognizer::GetResult(
156 return ans; 168 return ans;
157 } 169 }
158 170
  171 +// ============================================================================
  172 +// Non-streaming ASR
  173 +// ============================================================================
  174 +OfflineStream::OfflineStream(const SherpaOnnxOfflineStream *p)
  175 + : MoveOnly<OfflineStream, SherpaOnnxOfflineStream>(p) {}
  176 +
  177 +void OfflineStream::Destroy(const SherpaOnnxOfflineStream *p) const {
  178 + SherpaOnnxDestroyOfflineStream(p);
  179 +}
  180 +
  181 +void OfflineStream::AcceptWaveform(int32_t sample_rate, const float *samples,
  182 + int32_t n) const {
  183 + SherpaOnnxAcceptWaveformOffline(p_, sample_rate, samples, n);
  184 +}
  185 +
  186 +OfflineRecognizer OfflineRecognizer::Create(
  187 + const OfflineRecognizerConfig &config) {
  188 + struct SherpaOnnxOfflineRecognizerConfig c;
  189 + memset(&c, 0, sizeof(c));
  190 +
  191 + c.feat_config.sample_rate = config.feat_config.sample_rate;
  192 + c.feat_config.feature_dim = config.feat_config.feature_dim;
  193 + c.model_config.transducer.encoder =
  194 + config.model_config.transducer.encoder.c_str();
  195 + c.model_config.transducer.decoder =
  196 + config.model_config.transducer.decoder.c_str();
  197 + c.model_config.transducer.joiner =
  198 + config.model_config.transducer.joiner.c_str();
  199 +
  200 + c.model_config.paraformer.model =
  201 + config.model_config.paraformer.model.c_str();
  202 +
  203 + c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str();
  204 +
  205 + c.model_config.whisper.encoder = config.model_config.whisper.encoder.c_str();
  206 + c.model_config.whisper.decoder = config.model_config.whisper.decoder.c_str();
  207 + c.model_config.whisper.language =
  208 + config.model_config.whisper.language.c_str();
  209 + c.model_config.whisper.task = config.model_config.whisper.task.c_str();
  210 + c.model_config.whisper.tail_paddings =
  211 + config.model_config.whisper.tail_paddings;
  212 +
  213 + c.model_config.tdnn.model = config.model_config.tdnn.model.c_str();
  214 +
  215 + c.model_config.tokens = config.model_config.tokens.c_str();
  216 + c.model_config.num_threads = config.model_config.num_threads;
  217 + c.model_config.debug = config.model_config.debug;
  218 + c.model_config.provider = config.model_config.provider.c_str();
  219 + c.model_config.model_type = config.model_config.model_type.c_str();
  220 + c.model_config.modeling_unit = config.model_config.modeling_unit.c_str();
  221 + c.model_config.bpe_vocab = config.model_config.bpe_vocab.c_str();
  222 + c.model_config.telespeech_ctc = config.model_config.telespeech_ctc.c_str();
  223 +
  224 + c.model_config.sense_voice.model =
  225 + config.model_config.sense_voice.model.c_str();
  226 + c.model_config.sense_voice.language =
  227 + config.model_config.sense_voice.language.c_str();
  228 + c.model_config.sense_voice.use_itn = config.model_config.sense_voice.use_itn;
  229 +
  230 + c.lm_config.model = config.lm_config.model.c_str();
  231 + c.lm_config.scale = config.lm_config.scale;
  232 +
  233 + c.decoding_method = config.decoding_method.c_str();
  234 + c.max_active_paths = config.max_active_paths;
  235 + c.hotwords_file = config.hotwords_file.c_str();
  236 + c.hotwords_score = config.hotwords_score;
  237 +
  238 + c.rule_fsts = config.rule_fsts.c_str();
  239 + c.rule_fars = config.rule_fars.c_str();
  240 +
  241 + c.blank_penalty = config.blank_penalty;
  242 +
  243 + auto p = SherpaOnnxCreateOfflineRecognizer(&c);
  244 + return OfflineRecognizer(p);
  245 +}
  246 +
  247 +OfflineRecognizer::OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p)
  248 + : MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer>(p) {}
  249 +
  250 +void OfflineRecognizer::Destroy(const SherpaOnnxOfflineRecognizer *p) const {
  251 + SherpaOnnxDestroyOfflineRecognizer(p_);
  252 +}
  253 +
  254 +OfflineStream OfflineRecognizer::CreateStream() const {
  255 + auto p = SherpaOnnxCreateOfflineStream(p_);
  256 + return OfflineStream{p};
  257 +}
  258 +
  259 +void OfflineRecognizer::Decode(const OfflineStream *s) const {
  260 + SherpaOnnxDecodeOfflineStream(p_, s->Get());
  261 +}
  262 +
  263 +void OfflineRecognizer::Decode(const OfflineStream *ss, int32_t n) const {
  264 + if (n <= 0) {
  265 + return;
  266 + }
  267 +
  268 + std::vector<const SherpaOnnxOfflineStream *> streams(n);
  269 + for (int32_t i = 0; i != n; ++i) {
  270 + streams[i] = ss[i].Get();
  271 + }
  272 +
  273 + SherpaOnnxDecodeMultipleOfflineStreams(p_, streams.data(), n);
  274 +}
  275 +
  276 +OfflineRecognizerResult OfflineRecognizer::GetResult(
  277 + const OfflineStream *s) const {
  278 + auto r = SherpaOnnxGetOfflineStreamResult(s->Get());
  279 +
  280 + OfflineRecognizerResult ans;
  281 + if (r) {
  282 + ans.text = r->text;
  283 +
  284 + if (r->timestamps) {
  285 + ans.timestamps.resize(r->count);
  286 + std::copy(r->timestamps, r->timestamps + r->count, ans.timestamps.data());
  287 + }
  288 +
  289 + ans.tokens.resize(r->count);
  290 + for (int32_t i = 0; i != r->count; ++i) {
  291 + ans.tokens[i] = r->tokens_arr[i];
  292 + }
  293 +
  294 + ans.json = r->json;
  295 + ans.lang = r->lang ? r->lang : "";
  296 + ans.emotion = r->emotion ? r->emotion : "";
  297 + ans.event = r->event ? r->event : "";
  298 + }
  299 +
  300 + SherpaOnnxDestroyOfflineRecognizerResult(r);
  301 +
  302 + return ans;
  303 +}
  304 +
159 } // namespace sherpa_onnx::cxx 305 } // namespace sherpa_onnx::cxx
@@ -13,6 +13,9 @@ @@ -13,6 +13,9 @@
13 13
14 namespace sherpa_onnx::cxx { 14 namespace sherpa_onnx::cxx {
15 15
  16 +// ============================================================================
  17 +// Streaming ASR
  18 +// ============================================================================
16 struct SHERPA_ONNX_API OnlineTransducerModelConfig { 19 struct SHERPA_ONNX_API OnlineTransducerModelConfig {
17 std::string encoder; 20 std::string encoder;
18 std::string decoder; 21 std::string decoder;
@@ -148,6 +151,8 @@ class SHERPA_ONNX_API OnlineStream @@ -148,6 +151,8 @@ class SHERPA_ONNX_API OnlineStream
148 void AcceptWaveform(int32_t sample_rate, const float *samples, 151 void AcceptWaveform(int32_t sample_rate, const float *samples,
149 int32_t n) const; 152 int32_t n) const;
150 153
  154 + void InputFinished() const;
  155 +
151 void Destroy(const SherpaOnnxOnlineStream *p) const; 156 void Destroy(const SherpaOnnxOnlineStream *p) const;
152 }; 157 };
153 158
@@ -170,10 +175,134 @@ class SHERPA_ONNX_API OnlineRecognizer @@ -170,10 +175,134 @@ class SHERPA_ONNX_API OnlineRecognizer
170 175
171 OnlineRecognizerResult GetResult(const OnlineStream *s) const; 176 OnlineRecognizerResult GetResult(const OnlineStream *s) const;
172 177
  178 + void Reset(const OnlineStream *s) const;
  179 +
  180 + bool IsEndpoint(const OnlineStream *s) const;
  181 +
173 private: 182 private:
174 explicit OnlineRecognizer(const SherpaOnnxOnlineRecognizer *p); 183 explicit OnlineRecognizer(const SherpaOnnxOnlineRecognizer *p);
175 }; 184 };
176 185
  186 +// ============================================================================
  187 +// Non-streaming ASR
  188 +// ============================================================================
  189 +struct SHERPA_ONNX_API OfflineTransducerModelConfig {
  190 + std::string encoder;
  191 + std::string decoder;
  192 + std::string joiner;
  193 +};
  194 +
  195 +struct SHERPA_ONNX_API OfflineParaformerModelConfig {
  196 + std::string model;
  197 +};
  198 +
  199 +struct SHERPA_ONNX_API OfflineNemoEncDecCtcModelConfig {
  200 + std::string model;
  201 +};
  202 +
  203 +struct SHERPA_ONNX_API OfflineWhisperModelConfig {
  204 + std::string encoder;
  205 + std::string decoder;
  206 + std::string language;
  207 + std::string task = "transcribe";
  208 + int32_t tail_paddings = -1;
  209 +};
  210 +
  211 +struct SHERPA_ONNX_API OfflineTdnnModelConfig {
  212 + std::string model;
  213 +};
  214 +
  215 +struct SHERPA_ONNX_API SherpaOnnxOfflineLMConfig {
  216 + std::string model;
  217 + float scale = 1.0;
  218 +};
  219 +
  220 +struct SHERPA_ONNX_API OfflineSenseVoiceModelConfig {
  221 + std::string model;
  222 + std::string language;
  223 + bool use_itn = false;
  224 +};
  225 +
  226 +struct SHERPA_ONNX_API OfflineModelConfig {
  227 + OfflineTransducerModelConfig transducer;
  228 + OfflineParaformerModelConfig paraformer;
  229 + OfflineNemoEncDecCtcModelConfig nemo_ctc;
  230 + OfflineWhisperModelConfig whisper;
  231 + OfflineTdnnModelConfig tdnn;
  232 +
  233 + std::string tokens;
  234 + int32_t num_threads = 1;
  235 + bool debug = false;
  236 + std::string provider = "cpu";
  237 + std::string model_type;
  238 + std::string modeling_unit = "cjkchar";
  239 + std::string bpe_vocab;
  240 + std::string telespeech_ctc;
  241 + OfflineSenseVoiceModelConfig sense_voice;
  242 +};
  243 +
  244 +struct SHERPA_ONNX_API OfflineLMConfig {
  245 + std::string model;
  246 + float scale = 1.0;
  247 +};
  248 +
  249 +struct SHERPA_ONNX_API OfflineRecognizerConfig {
  250 + FeatureConfig feat_config;
  251 + OfflineModelConfig model_config;
  252 + OfflineLMConfig lm_config;
  253 +
  254 + std::string decoding_method = "greedy_search";
  255 + int32_t max_active_paths = 4;
  256 +
  257 + std::string hotwords_file;
  258 +
  259 + float hotwords_score = 1.5;
  260 + std::string rule_fsts;
  261 + std::string rule_fars;
  262 + float blank_penalty = 0;
  263 +};
  264 +
  265 +struct SHERPA_ONNX_API OfflineRecognizerResult {
  266 + std::string text;
  267 + std::vector<float> timestamps;
  268 + std::vector<std::string> tokens;
  269 + std::string json;
  270 + std::string lang;
  271 + std::string emotion;
  272 + std::string event;
  273 +};
  274 +
  275 +class SHERPA_ONNX_API OfflineStream
  276 + : public MoveOnly<OfflineStream, SherpaOnnxOfflineStream> {
  277 + public:
  278 + explicit OfflineStream(const SherpaOnnxOfflineStream *p);
  279 +
  280 + void AcceptWaveform(int32_t sample_rate, const float *samples,
  281 + int32_t n) const;
  282 +
  283 + void Destroy(const SherpaOnnxOfflineStream *p) const;
  284 +};
  285 +
  286 +class SHERPA_ONNX_API OfflineRecognizer
  287 + : public MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer> {
  288 + public:
  289 + static OfflineRecognizer Create(const OfflineRecognizerConfig &config);
  290 +
  291 + void Destroy(const SherpaOnnxOfflineRecognizer *p) const;
  292 +
  293 + OfflineStream CreateStream() const;
  294 +
  295 + void Decode(const OfflineStream *s) const;
  296 +
  297 + void Decode(const OfflineStream *ss, int32_t n) const;
  298 +
  299 + OfflineRecognizerResult GetResult(const OfflineStream *s) const;
  300 +
  301 + private:
  302 + explicit OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p);
  303 +};
  304 +
177 } // namespace sherpa_onnx::cxx 305 } // namespace sherpa_onnx::cxx
178 306
179 #endif // SHERPA_ONNX_C_API_CXX_API_H_ 307 #endif // SHERPA_ONNX_C_API_CXX_API_H_
  308 + //
@@ -30,9 +30,13 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create( @@ -30,9 +30,13 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
30 if (!config.model_config.transducer.encoder.empty()) { 30 if (!config.model_config.transducer.encoder.empty()) {
31 Ort::Env env(ORT_LOGGING_LEVEL_ERROR); 31 Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
32 32
  33 + Ort::SessionOptions sess_opts;
  34 + sess_opts.SetIntraOpNumThreads(1);
  35 + sess_opts.SetInterOpNumThreads(1);
  36 +
33 auto decoder_model = ReadFile(config.model_config.transducer.decoder); 37 auto decoder_model = ReadFile(config.model_config.transducer.decoder);
34 - auto sess = std::make_unique<Ort::Session>(  
35 - env, decoder_model.data(), decoder_model.size(), Ort::SessionOptions{}); 38 + auto sess = std::make_unique<Ort::Session>(env, decoder_model.data(),
  39 + decoder_model.size(), sess_opts);
36 40
37 size_t node_count = sess->GetOutputCount(); 41 size_t node_count = sess->GetOutputCount();
38 42
@@ -63,9 +67,13 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create( @@ -63,9 +67,13 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
63 if (!config.model_config.transducer.encoder.empty()) { 67 if (!config.model_config.transducer.encoder.empty()) {
64 Ort::Env env(ORT_LOGGING_LEVEL_ERROR); 68 Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
65 69
  70 + Ort::SessionOptions sess_opts;
  71 + sess_opts.SetIntraOpNumThreads(1);
  72 + sess_opts.SetInterOpNumThreads(1);
  73 +
66 auto decoder_model = ReadFile(mgr, config.model_config.transducer.decoder); 74 auto decoder_model = ReadFile(mgr, config.model_config.transducer.decoder);
67 - auto sess = std::make_unique<Ort::Session>(  
68 - env, decoder_model.data(), decoder_model.size(), Ort::SessionOptions{}); 75 + auto sess = std::make_unique<Ort::Session>(env, decoder_model.data(),
  76 + decoder_model.size(), sess_opts);
69 77
70 size_t node_count = sess->GetOutputCount(); 78 size_t node_count = sess->GetOutputCount();
71 79