Committed by
GitHub
Add C++ API for non-streaming ASR (#1456)
正在显示
31 个修改的文件
包含
604 行增加
和
43 行删除
| @@ -9,6 +9,8 @@ log() { | @@ -9,6 +9,8 @@ log() { | ||
| 9 | } | 9 | } |
| 10 | 10 | ||
| 11 | echo "CXX_STREAMING_ZIPFORMER_EXE is $CXX_STREAMING_ZIPFORMER_EXE" | 11 | echo "CXX_STREAMING_ZIPFORMER_EXE is $CXX_STREAMING_ZIPFORMER_EXE" |
| 12 | +echo "CXX_WHISPER_EXE is $CXX_WHISPER_EXE" | ||
| 13 | +echo "CXX_SENSE_VOICE_EXE is $CXX_SENSE_VOICE_EXE" | ||
| 12 | echo "PATH: $PATH" | 14 | echo "PATH: $PATH" |
| 13 | 15 | ||
| 14 | log "------------------------------------------------------------" | 16 | log "------------------------------------------------------------" |
| @@ -19,3 +21,22 @@ tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | @@ -19,3 +21,22 @@ tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 19 | rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | 21 | rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 |
| 20 | $CXX_STREAMING_ZIPFORMER_EXE | 22 | $CXX_STREAMING_ZIPFORMER_EXE |
| 21 | rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | 23 | rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 |
| 24 | + | ||
| 25 | +log "------------------------------------------------------------" | ||
| 26 | +log "Test Whisper CXX API" | ||
| 27 | +log "------------------------------------------------------------" | ||
| 28 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 29 | +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 30 | +rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 31 | +$CXX_WHISPER_EXE | ||
| 32 | +rm -rf sherpa-onnx-whisper-tiny.en | ||
| 33 | + | ||
| 34 | +log "------------------------------------------------------------" | ||
| 35 | +log "Test SenseVoice CXX API" | ||
| 36 | +log "------------------------------------------------------------" | ||
| 37 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 38 | +tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 39 | +rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 40 | + | ||
| 41 | +$CXX_SENSE_VOICE_EXE | ||
| 42 | +rm -rf sherpa-onnx-sense-voice-* |
| @@ -4,6 +4,7 @@ on: | @@ -4,6 +4,7 @@ on: | ||
| 4 | push: | 4 | push: |
| 5 | branches: | 5 | branches: |
| 6 | - master | 6 | - master |
| 7 | + - cxx-api-asr-non-streaming | ||
| 7 | paths: | 8 | paths: |
| 8 | - '.github/workflows/cxx-api.yaml' | 9 | - '.github/workflows/cxx-api.yaml' |
| 9 | - 'CMakeLists.txt' | 10 | - 'CMakeLists.txt' |
| @@ -82,6 +83,74 @@ jobs: | @@ -82,6 +83,74 @@ jobs: | ||
| 82 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib | 83 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib |
| 83 | fi | 84 | fi |
| 84 | 85 | ||
| 86 | + - name: Test whisper | ||
| 87 | + shell: bash | ||
| 88 | + run: | | ||
| 89 | + g++ -std=c++17 -o whisper-cxx-api ./cxx-api-examples/whisper-cxx-api.cc \ | ||
| 90 | + -I ./build/install/include \ | ||
| 91 | + -L ./build/install/lib/ \ | ||
| 92 | + -l sherpa-onnx-cxx-api \ | ||
| 93 | + -l sherpa-onnx-c-api \ | ||
| 94 | + -l onnxruntime | ||
| 95 | + | ||
| 96 | + ls -lh whisper-cxx-api | ||
| 97 | + | ||
| 98 | + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then | ||
| 99 | + ldd ./whisper-cxx-api | ||
| 100 | + echo "----" | ||
| 101 | + readelf -d ./whisper-cxx-api | ||
| 102 | + fi | ||
| 103 | + | ||
| 104 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 105 | + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 106 | + rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 107 | + | ||
| 108 | + ls -lh sherpa-onnx-whisper-tiny.en | ||
| 109 | + echo "---" | ||
| 110 | + ls -lh sherpa-onnx-whisper-tiny.en/test_wavs | ||
| 111 | + | ||
| 112 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 113 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 114 | + | ||
| 115 | + ./whisper-cxx-api | ||
| 116 | + | ||
| 117 | + rm -rf sherpa-onnx-whisper-* | ||
| 118 | + rm ./whisper-cxx-api | ||
| 119 | + | ||
| 120 | + - name: Test SenseVoice | ||
| 121 | + shell: bash | ||
| 122 | + run: | | ||
| 123 | + g++ -std=c++17 -o sense-voice-cxx-api ./cxx-api-examples/sense-voice-cxx-api.cc \ | ||
| 124 | + -I ./build/install/include \ | ||
| 125 | + -L ./build/install/lib/ \ | ||
| 126 | + -l sherpa-onnx-cxx-api \ | ||
| 127 | + -l sherpa-onnx-c-api \ | ||
| 128 | + -l onnxruntime | ||
| 129 | + | ||
| 130 | + ls -lh sense-voice-cxx-api | ||
| 131 | + | ||
| 132 | + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then | ||
| 133 | + ldd ./sense-voice-cxx-api | ||
| 134 | + echo "----" | ||
| 135 | + readelf -d ./sense-voice-cxx-api | ||
| 136 | + fi | ||
| 137 | + | ||
| 138 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 139 | + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 140 | + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 141 | + | ||
| 142 | + ls -lh sherpa-onnx-sense-voice-* | ||
| 143 | + echo "---" | ||
| 144 | + ls -lh sherpa-onnx-sense-voice-*/test_wavs | ||
| 145 | + | ||
| 146 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 147 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 148 | + | ||
| 149 | + ./sense-voice-cxx-api | ||
| 150 | + | ||
| 151 | + rm -rf sherpa-onnx-sense-voice-* | ||
| 152 | + rm ./sense-voice-cxx-api | ||
| 153 | + | ||
| 85 | - name: Test streaming zipformer | 154 | - name: Test streaming zipformer |
| 86 | shell: bash | 155 | shell: bash |
| 87 | run: | | 156 | run: | |
| @@ -155,6 +155,8 @@ jobs: | @@ -155,6 +155,8 @@ jobs: | ||
| 155 | du -h -d1 . | 155 | du -h -d1 . |
| 156 | export PATH=$PWD/build/bin:$PATH | 156 | export PATH=$PWD/build/bin:$PATH |
| 157 | export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api | 157 | export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api |
| 158 | + export CXX_WHISPER_EXE=whisper-cxx-api | ||
| 159 | + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api | ||
| 158 | 160 | ||
| 159 | .github/scripts/test-cxx-api.sh | 161 | .github/scripts/test-cxx-api.sh |
| 160 | du -h -d1 . | 162 | du -h -d1 . |
| @@ -127,6 +127,8 @@ jobs: | @@ -127,6 +127,8 @@ jobs: | ||
| 127 | du -h -d1 . | 127 | du -h -d1 . |
| 128 | export PATH=$PWD/build/bin:$PATH | 128 | export PATH=$PWD/build/bin:$PATH |
| 129 | export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api | 129 | export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api |
| 130 | + export CXX_WHISPER_EXE=whisper-cxx-api | ||
| 131 | + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api | ||
| 130 | 132 | ||
| 131 | .github/scripts/test-cxx-api.sh | 133 | .github/scripts/test-cxx-api.sh |
| 132 | du -h -d1 . | 134 | du -h -d1 . |
| @@ -81,6 +81,7 @@ jobs: | @@ -81,6 +81,7 @@ jobs: | ||
| 81 | run: | | 81 | run: | |
| 82 | export PATH=$PWD/build/bin:$PATH | 82 | export PATH=$PWD/build/bin:$PATH |
| 83 | export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api | 83 | export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api |
| 84 | + export CXX_WHISPER_EXE=whisper-cxx-api | ||
| 84 | 85 | ||
| 85 | .github/scripts/test-cxx-api.sh | 86 | .github/scripts/test-cxx-api.sh |
| 86 | 87 |
| @@ -98,6 +98,8 @@ jobs: | @@ -98,6 +98,8 @@ jobs: | ||
| 98 | run: | | 98 | run: | |
| 99 | export PATH=$PWD/build/bin/Release:$PATH | 99 | export PATH=$PWD/build/bin/Release:$PATH |
| 100 | export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe | 100 | export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe |
| 101 | + export CXX_WHISPER_EXE=whisper-cxx-api.exe | ||
| 102 | + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe | ||
| 101 | 103 | ||
| 102 | .github/scripts/test-cxx-api.sh | 104 | .github/scripts/test-cxx-api.sh |
| 103 | 105 |
| @@ -98,6 +98,8 @@ jobs: | @@ -98,6 +98,8 @@ jobs: | ||
| 98 | run: | | 98 | run: | |
| 99 | export PATH=$PWD/build/bin/Release:$PATH | 99 | export PATH=$PWD/build/bin/Release:$PATH |
| 100 | export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe | 100 | export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe |
| 101 | + export CXX_WHISPER_EXE=whisper-cxx-api.exe | ||
| 102 | + export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe | ||
| 101 | 103 | ||
| 102 | .github/scripts/test-cxx-api.sh | 104 | .github/scripts/test-cxx-api.sh |
| 103 | 105 |
| @@ -54,7 +54,7 @@ int32_t main() { | @@ -54,7 +54,7 @@ int32_t main() { | ||
| 54 | recognizer_config.decoding_method = "greedy_search"; | 54 | recognizer_config.decoding_method = "greedy_search"; |
| 55 | recognizer_config.model_config = offline_model_config; | 55 | recognizer_config.model_config = offline_model_config; |
| 56 | 56 | ||
| 57 | - SherpaOnnxOfflineRecognizer *recognizer = | 57 | + const SherpaOnnxOfflineRecognizer *recognizer = |
| 58 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); | 58 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); |
| 59 | 59 | ||
| 60 | if (recognizer == NULL) { | 60 | if (recognizer == NULL) { |
| @@ -63,7 +63,8 @@ int32_t main() { | @@ -63,7 +63,8 @@ int32_t main() { | ||
| 63 | return -1; | 63 | return -1; |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); | 66 | + const SherpaOnnxOfflineStream *stream = |
| 67 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 67 | 68 | ||
| 68 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | 69 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, |
| 69 | wave->num_samples); | 70 | wave->num_samples); |
| @@ -56,7 +56,7 @@ int32_t main() { | @@ -56,7 +56,7 @@ int32_t main() { | ||
| 56 | recognizer_config.decoding_method = "greedy_search"; | 56 | recognizer_config.decoding_method = "greedy_search"; |
| 57 | recognizer_config.model_config = offline_model_config; | 57 | recognizer_config.model_config = offline_model_config; |
| 58 | 58 | ||
| 59 | - SherpaOnnxOfflineRecognizer *recognizer = | 59 | + const SherpaOnnxOfflineRecognizer *recognizer = |
| 60 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); | 60 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); |
| 61 | 61 | ||
| 62 | if (recognizer == NULL) { | 62 | if (recognizer == NULL) { |
| @@ -65,7 +65,8 @@ int32_t main() { | @@ -65,7 +65,8 @@ int32_t main() { | ||
| 65 | return -1; | 65 | return -1; |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); | 68 | + const SherpaOnnxOfflineStream *stream = |
| 69 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 69 | 70 | ||
| 70 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | 71 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, |
| 71 | wave->num_samples); | 72 | wave->num_samples); |
| @@ -107,7 +107,8 @@ int32_t main() { | @@ -107,7 +107,8 @@ int32_t main() { | ||
| 107 | return -1; | 107 | return -1; |
| 108 | } | 108 | } |
| 109 | 109 | ||
| 110 | - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); | 110 | + const SherpaOnnxOnlineStream *stream = |
| 111 | + SherpaOnnxCreateOnlineStream(recognizer); | ||
| 111 | 112 | ||
| 112 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); | 113 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); |
| 113 | int32_t segment_id = 0; | 114 | int32_t segment_id = 0; |
| @@ -108,7 +108,8 @@ int32_t main() { | @@ -108,7 +108,8 @@ int32_t main() { | ||
| 108 | return -1; | 108 | return -1; |
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); | 111 | + const SherpaOnnxOnlineStream *stream = |
| 112 | + SherpaOnnxCreateOnlineStream(recognizer); | ||
| 112 | 113 | ||
| 113 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); | 114 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); |
| 114 | int32_t segment_id = 0; | 115 | int32_t segment_id = 0; |
| @@ -66,7 +66,8 @@ int32_t main() { | @@ -66,7 +66,8 @@ int32_t main() { | ||
| 66 | return -1; | 66 | return -1; |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); | 69 | + const SherpaOnnxOnlineStream *stream = |
| 70 | + SherpaOnnxCreateOnlineStream(recognizer); | ||
| 70 | 71 | ||
| 71 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); | 72 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); |
| 72 | int32_t segment_id = 0; | 73 | int32_t segment_id = 0; |
| @@ -130,7 +130,8 @@ int32_t main() { | @@ -130,7 +130,8 @@ int32_t main() { | ||
| 130 | return -1; | 130 | return -1; |
| 131 | } | 131 | } |
| 132 | 132 | ||
| 133 | - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); | 133 | + const SherpaOnnxOnlineStream *stream = |
| 134 | + SherpaOnnxCreateOnlineStream(recognizer); | ||
| 134 | 135 | ||
| 135 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); | 136 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); |
| 136 | int32_t segment_id = 0; | 137 | int32_t segment_id = 0; |
| @@ -72,7 +72,8 @@ int32_t main() { | @@ -72,7 +72,8 @@ int32_t main() { | ||
| 72 | return -1; | 72 | return -1; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); | 75 | + const SherpaOnnxOnlineStream *stream = |
| 76 | + SherpaOnnxCreateOnlineStream(recognizer); | ||
| 76 | 77 | ||
| 77 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); | 78 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); |
| 78 | int32_t segment_id = 0; | 79 | int32_t segment_id = 0; |
| @@ -49,7 +49,7 @@ int32_t main() { | @@ -49,7 +49,7 @@ int32_t main() { | ||
| 49 | recognizer_config.decoding_method = "greedy_search"; | 49 | recognizer_config.decoding_method = "greedy_search"; |
| 50 | recognizer_config.model_config = offline_model_config; | 50 | recognizer_config.model_config = offline_model_config; |
| 51 | 51 | ||
| 52 | - SherpaOnnxOfflineRecognizer *recognizer = | 52 | + const SherpaOnnxOfflineRecognizer *recognizer = |
| 53 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); | 53 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); |
| 54 | 54 | ||
| 55 | if (recognizer == NULL) { | 55 | if (recognizer == NULL) { |
| @@ -58,7 +58,8 @@ int32_t main() { | @@ -58,7 +58,8 @@ int32_t main() { | ||
| 58 | return -1; | 58 | return -1; |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); | 61 | + const SherpaOnnxOfflineStream *stream = |
| 62 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 62 | 63 | ||
| 63 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | 64 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, |
| 64 | wave->num_samples); | 65 | wave->num_samples); |
| @@ -66,7 +66,7 @@ int32_t main() { | @@ -66,7 +66,7 @@ int32_t main() { | ||
| 66 | recognizer_config.decoding_method = "greedy_search"; | 66 | recognizer_config.decoding_method = "greedy_search"; |
| 67 | recognizer_config.model_config = offline_model_config; | 67 | recognizer_config.model_config = offline_model_config; |
| 68 | 68 | ||
| 69 | - SherpaOnnxOfflineRecognizer *recognizer = | 69 | + const SherpaOnnxOfflineRecognizer *recognizer = |
| 70 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); | 70 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); |
| 71 | 71 | ||
| 72 | if (recognizer == NULL) { | 72 | if (recognizer == NULL) { |
| @@ -108,8 +108,9 @@ int32_t main() { | @@ -108,8 +108,9 @@ int32_t main() { | ||
| 108 | const SherpaOnnxSpeechSegment *segment = | 108 | const SherpaOnnxSpeechSegment *segment = |
| 109 | SherpaOnnxVoiceActivityDetectorFront(vad); | 109 | SherpaOnnxVoiceActivityDetectorFront(vad); |
| 110 | 110 | ||
| 111 | - SherpaOnnxOfflineStream *stream = | 111 | + const SherpaOnnxOfflineStream *stream = |
| 112 | SherpaOnnxCreateOfflineStream(recognizer); | 112 | SherpaOnnxCreateOfflineStream(recognizer); |
| 113 | + | ||
| 113 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, | 114 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, |
| 114 | segment->samples, segment->n); | 115 | segment->samples, segment->n); |
| 115 | 116 | ||
| @@ -138,7 +139,9 @@ int32_t main() { | @@ -138,7 +139,9 @@ int32_t main() { | ||
| 138 | const SherpaOnnxSpeechSegment *segment = | 139 | const SherpaOnnxSpeechSegment *segment = |
| 139 | SherpaOnnxVoiceActivityDetectorFront(vad); | 140 | SherpaOnnxVoiceActivityDetectorFront(vad); |
| 140 | 141 | ||
| 141 | - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); | 142 | + const SherpaOnnxOfflineStream *stream = |
| 143 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 144 | + | ||
| 142 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, segment->samples, | 145 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, segment->samples, |
| 143 | segment->n); | 146 | segment->n); |
| 144 | 147 |
| @@ -58,7 +58,7 @@ int32_t main() { | @@ -58,7 +58,7 @@ int32_t main() { | ||
| 58 | recognizer_config.decoding_method = "greedy_search"; | 58 | recognizer_config.decoding_method = "greedy_search"; |
| 59 | recognizer_config.model_config = offline_model_config; | 59 | recognizer_config.model_config = offline_model_config; |
| 60 | 60 | ||
| 61 | - SherpaOnnxOfflineRecognizer *recognizer = | 61 | + const SherpaOnnxOfflineRecognizer *recognizer = |
| 62 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); | 62 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); |
| 63 | 63 | ||
| 64 | if (recognizer == NULL) { | 64 | if (recognizer == NULL) { |
| @@ -69,7 +69,8 @@ int32_t main() { | @@ -69,7 +69,8 @@ int32_t main() { | ||
| 69 | return -1; | 69 | return -1; |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); | 72 | + const SherpaOnnxOfflineStream *stream = |
| 73 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 73 | 74 | ||
| 74 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | 75 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, |
| 75 | wave->num_samples); | 76 | wave->num_samples); |
| @@ -60,7 +60,7 @@ int32_t main() { | @@ -60,7 +60,7 @@ int32_t main() { | ||
| 60 | recognizer_config.decoding_method = "greedy_search"; | 60 | recognizer_config.decoding_method = "greedy_search"; |
| 61 | recognizer_config.model_config = offline_model_config; | 61 | recognizer_config.model_config = offline_model_config; |
| 62 | 62 | ||
| 63 | - SherpaOnnxOfflineRecognizer *recognizer = | 63 | + const SherpaOnnxOfflineRecognizer *recognizer = |
| 64 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); | 64 | SherpaOnnxCreateOfflineRecognizer(&recognizer_config); |
| 65 | 65 | ||
| 66 | if (recognizer == NULL) { | 66 | if (recognizer == NULL) { |
| @@ -69,7 +69,8 @@ int32_t main() { | @@ -69,7 +69,8 @@ int32_t main() { | ||
| 69 | return -1; | 69 | return -1; |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); | 72 | + const SherpaOnnxOfflineStream *stream = |
| 73 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 73 | 74 | ||
| 74 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | 75 | SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, |
| 75 | wave->num_samples); | 76 | wave->num_samples); |
| @@ -2,3 +2,9 @@ include_directories(${CMAKE_SOURCE_DIR}) | @@ -2,3 +2,9 @@ include_directories(${CMAKE_SOURCE_DIR}) | ||
| 2 | 2 | ||
| 3 | add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc) | 3 | add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc) |
| 4 | target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api) | 4 | target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api) |
| 5 | + | ||
| 6 | +add_executable(whisper-cxx-api ./whisper-cxx-api.cc) | ||
| 7 | +target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api) | ||
| 8 | + | ||
| 9 | +add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) | ||
| 10 | +target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) |
cxx-api-examples/sense-voice-cxx-api.cc
0 → 100644
| 1 | +// cxx-api-examples/sense-voice-cxx-api.cc | ||
| 2 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +// | ||
| 5 | +// This file demonstrates how to use sense voice with sherpa-onnx's C++ API. | ||
| 6 | +// | ||
| 7 | +// clang-format off | ||
| 8 | +// | ||
| 9 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 10 | +// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 11 | +// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 12 | +// | ||
| 13 | +// clang-format on | ||
| 14 | + | ||
| 15 | +#include <chrono> // NOLINT | ||
| 16 | +#include <iostream> | ||
| 17 | +#include <string> | ||
| 18 | + | ||
| 19 | +#include "sherpa-onnx/c-api/cxx-api.h" | ||
| 20 | + | ||
| 21 | +int32_t main() { | ||
| 22 | + using namespace sherpa_onnx::cxx; | ||
| 23 | + OfflineRecognizerConfig config; | ||
| 24 | + | ||
| 25 | + config.model_config.sense_voice.model = | ||
| 26 | + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"; | ||
| 27 | + config.model_config.sense_voice.use_itn = true; | ||
| 28 | + config.model_config.sense_voice.language = "auto"; | ||
| 29 | + config.model_config.tokens = | ||
| 30 | + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"; | ||
| 31 | + | ||
| 32 | + config.model_config.num_threads = 1; | ||
| 33 | + | ||
| 34 | + std::cout << "Loading model\n"; | ||
| 35 | + OfflineRecognizer recongizer = OfflineRecognizer::Create(config); | ||
| 36 | + if (!recongizer.Get()) { | ||
| 37 | + std::cerr << "Please check your config\n"; | ||
| 38 | + return -1; | ||
| 39 | + } | ||
| 40 | + std::cout << "Loading model done\n"; | ||
| 41 | + | ||
| 42 | + std::string wave_filename = | ||
| 43 | + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav"; | ||
| 44 | + | ||
| 45 | + Wave wave = ReadWave(wave_filename); | ||
| 46 | + if (wave.samples.empty()) { | ||
| 47 | + std::cerr << "Failed to read: '" << wave_filename << "'\n"; | ||
| 48 | + return -1; | ||
| 49 | + } | ||
| 50 | + | ||
| 51 | + std::cout << "Start recognition\n"; | ||
| 52 | + const auto begin = std::chrono::steady_clock::now(); | ||
| 53 | + | ||
| 54 | + OfflineStream stream = recongizer.CreateStream(); | ||
| 55 | + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), | ||
| 56 | + wave.samples.size()); | ||
| 57 | + | ||
| 58 | + recongizer.Decode(&stream); | ||
| 59 | + | ||
| 60 | + OfflineRecognizerResult result = recongizer.GetResult(&stream); | ||
| 61 | + | ||
| 62 | + const auto end = std::chrono::steady_clock::now(); | ||
| 63 | + const float elapsed_seconds = | ||
| 64 | + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin) | ||
| 65 | + .count() / | ||
| 66 | + 1000.; | ||
| 67 | + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate); | ||
| 68 | + float rtf = elapsed_seconds / duration; | ||
| 69 | + | ||
| 70 | + std::cout << "text: " << result.text << "\n"; | ||
| 71 | + printf("Number of threads: %d\n", config.model_config.num_threads); | ||
| 72 | + printf("Duration: %.3fs\n", duration); | ||
| 73 | + printf("Elapsed seconds: %.3fs\n", elapsed_seconds); | ||
| 74 | + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds, | ||
| 75 | + duration, rtf); | ||
| 76 | + | ||
| 77 | + return 0; | ||
| 78 | +} |
| @@ -66,6 +66,8 @@ int32_t main() { | @@ -66,6 +66,8 @@ int32_t main() { | ||
| 66 | OnlineStream stream = recongizer.CreateStream(); | 66 | OnlineStream stream = recongizer.CreateStream(); |
| 67 | stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), | 67 | stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), |
| 68 | wave.samples.size()); | 68 | wave.samples.size()); |
| 69 | + stream.InputFinished(); | ||
| 70 | + | ||
| 69 | while (recongizer.IsReady(&stream)) { | 71 | while (recongizer.IsReady(&stream)) { |
| 70 | recongizer.Decode(&stream); | 72 | recongizer.Decode(&stream); |
| 71 | } | 73 | } |
cxx-api-examples/whisper-cxx-api.cc
0 → 100644
| 1 | +// cxx-api-examples/whisper-cxx-api.cc | ||
| 2 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +// | ||
| 5 | +// This file demonstrates how to use whisper with sherpa-onnx's C++ API. | ||
| 6 | +// | ||
| 7 | +// clang-format off | ||
| 8 | +// | ||
| 9 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 10 | +// tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 11 | +// rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 12 | +// | ||
| 13 | +// clang-format on | ||
| 14 | + | ||
| 15 | +#include <chrono> // NOLINT | ||
| 16 | +#include <iostream> | ||
| 17 | +#include <string> | ||
| 18 | + | ||
| 19 | +#include "sherpa-onnx/c-api/cxx-api.h" | ||
| 20 | + | ||
| 21 | +int32_t main() { | ||
| 22 | + using namespace sherpa_onnx::cxx; | ||
| 23 | + OfflineRecognizerConfig config; | ||
| 24 | + | ||
| 25 | + config.model_config.whisper.encoder = | ||
| 26 | + "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"; | ||
| 27 | + config.model_config.whisper.decoder = | ||
| 28 | + "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"; | ||
| 29 | + config.model_config.tokens = | ||
| 30 | + "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"; | ||
| 31 | + | ||
| 32 | + config.model_config.num_threads = 1; | ||
| 33 | + | ||
| 34 | + std::cout << "Loading model\n"; | ||
| 35 | + OfflineRecognizer recongizer = OfflineRecognizer::Create(config); | ||
| 36 | + if (!recongizer.Get()) { | ||
| 37 | + std::cerr << "Please check your config\n"; | ||
| 38 | + return -1; | ||
| 39 | + } | ||
| 40 | + std::cout << "Loading model done\n"; | ||
| 41 | + | ||
| 42 | + std::string wave_filename = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav"; | ||
| 43 | + Wave wave = ReadWave(wave_filename); | ||
| 44 | + if (wave.samples.empty()) { | ||
| 45 | + std::cerr << "Failed to read: '" << wave_filename << "'\n"; | ||
| 46 | + return -1; | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + std::cout << "Start recognition\n"; | ||
| 50 | + const auto begin = std::chrono::steady_clock::now(); | ||
| 51 | + | ||
| 52 | + OfflineStream stream = recongizer.CreateStream(); | ||
| 53 | + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), | ||
| 54 | + wave.samples.size()); | ||
| 55 | + | ||
| 56 | + recongizer.Decode(&stream); | ||
| 57 | + | ||
| 58 | + OfflineRecognizerResult result = recongizer.GetResult(&stream); | ||
| 59 | + | ||
| 60 | + const auto end = std::chrono::steady_clock::now(); | ||
| 61 | + const float elapsed_seconds = | ||
| 62 | + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin) | ||
| 63 | + .count() / | ||
| 64 | + 1000.; | ||
| 65 | + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate); | ||
| 66 | + float rtf = elapsed_seconds / duration; | ||
| 67 | + | ||
| 68 | + std::cout << "text: " << result.text << "\n"; | ||
| 69 | + printf("Number of threads: %d\n", config.model_config.num_threads); | ||
| 70 | + printf("Duration: %.3fs\n", duration); | ||
| 71 | + printf("Elapsed seconds: %.3fs\n", elapsed_seconds); | ||
| 72 | + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds, | ||
| 73 | + duration, rtf); | ||
| 74 | + | ||
| 75 | + return 0; | ||
| 76 | +} |
| @@ -320,7 +320,8 @@ int main(int argc, char **argv) { | @@ -320,7 +320,8 @@ int main(int argc, char **argv) { | ||
| 320 | 320 | ||
| 321 | const SherpaOnnxOnlineRecognizer *recognizer = | 321 | const SherpaOnnxOnlineRecognizer *recognizer = |
| 322 | SherpaOnnxCreateOnlineRecognizer(&config); | 322 | SherpaOnnxCreateOnlineRecognizer(&config); |
| 323 | - SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer); | 323 | + const SherpaOnnxOnlineStream *stream = |
| 324 | + SherpaOnnxCreateOnlineStream(recognizer); | ||
| 324 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); | 325 | const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); |
| 325 | int32_t segment_id = 0; | 326 | int32_t segment_id = 0; |
| 326 | 327 |
| @@ -256,7 +256,7 @@ void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() { | @@ -256,7 +256,7 @@ void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() { | ||
| 256 | } | 256 | } |
| 257 | pa_stream_ = nullptr; | 257 | pa_stream_ = nullptr; |
| 258 | 258 | ||
| 259 | - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer_); | 259 | + const SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer_); |
| 260 | 260 | ||
| 261 | SherpaOnnxAcceptWaveformOffline(stream, config_.feat_config.sample_rate, | 261 | SherpaOnnxAcceptWaveformOffline(stream, config_.feat_config.sample_rate, |
| 262 | samples_.data(), static_cast<int32_t>(samples_.size())); | 262 | samples_.data(), static_cast<int32_t>(samples_.size())); |
| @@ -48,7 +48,7 @@ class CNonStreamingSpeechRecognitionDlg : public CDialogEx { | @@ -48,7 +48,7 @@ class CNonStreamingSpeechRecognitionDlg : public CDialogEx { | ||
| 48 | private: | 48 | private: |
| 49 | Microphone mic_; | 49 | Microphone mic_; |
| 50 | 50 | ||
| 51 | - SherpaOnnxOfflineRecognizer *recognizer_ = nullptr; | 51 | + const SherpaOnnxOfflineRecognizer *recognizer_ = nullptr; |
| 52 | SherpaOnnxOfflineRecognizerConfig config_; | 52 | SherpaOnnxOfflineRecognizerConfig config_; |
| 53 | 53 | ||
| 54 | PaStream *pa_stream_ = nullptr; | 54 | PaStream *pa_stream_ = nullptr; |
| @@ -203,7 +203,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | @@ -203,7 +203,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 203 | SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars); | 203 | SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars); |
| 204 | SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty); | 204 | SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty); |
| 205 | 205 | ||
| 206 | - SherpaOnnxOfflineRecognizer *recognizer = | 206 | + const SherpaOnnxOfflineRecognizer *recognizer = |
| 207 | SherpaOnnxCreateOfflineRecognizer(&c); | 207 | SherpaOnnxCreateOfflineRecognizer(&c); |
| 208 | 208 | ||
| 209 | if (c.model_config.transducer.encoder) { | 209 | if (c.model_config.transducer.encoder) { |
| @@ -306,7 +306,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | @@ -306,7 +306,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 306 | } | 306 | } |
| 307 | 307 | ||
| 308 | return Napi::External<SherpaOnnxOfflineRecognizer>::New( | 308 | return Napi::External<SherpaOnnxOfflineRecognizer>::New( |
| 309 | - env, recognizer, | 309 | + env, const_cast<SherpaOnnxOfflineRecognizer *>(recognizer), |
| 310 | [](Napi::Env env, SherpaOnnxOfflineRecognizer *recognizer) { | 310 | [](Napi::Env env, SherpaOnnxOfflineRecognizer *recognizer) { |
| 311 | SherpaOnnxDestroyOfflineRecognizer(recognizer); | 311 | SherpaOnnxDestroyOfflineRecognizer(recognizer); |
| 312 | }); | 312 | }); |
| @@ -336,10 +336,12 @@ static Napi::External<SherpaOnnxOfflineStream> CreateOfflineStreamWrapper( | @@ -336,10 +336,12 @@ static Napi::External<SherpaOnnxOfflineStream> CreateOfflineStreamWrapper( | ||
| 336 | SherpaOnnxOfflineRecognizer *recognizer = | 336 | SherpaOnnxOfflineRecognizer *recognizer = |
| 337 | info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data(); | 337 | info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data(); |
| 338 | 338 | ||
| 339 | - SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer); | 339 | + const SherpaOnnxOfflineStream *stream = |
| 340 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 340 | 341 | ||
| 341 | return Napi::External<SherpaOnnxOfflineStream>::New( | 342 | return Napi::External<SherpaOnnxOfflineStream>::New( |
| 342 | - env, stream, [](Napi::Env env, SherpaOnnxOfflineStream *stream) { | 343 | + env, const_cast<SherpaOnnxOfflineStream>(stream), |
| 344 | + [](Napi::Env env, SherpaOnnxOfflineStream *stream) { | ||
| 343 | SherpaOnnxDestroyOfflineStream(stream); | 345 | SherpaOnnxDestroyOfflineStream(stream); |
| 344 | }); | 346 | }); |
| 345 | } | 347 | } |
| @@ -168,14 +168,14 @@ void SherpaOnnxDestroyOnlineRecognizer( | @@ -168,14 +168,14 @@ void SherpaOnnxDestroyOnlineRecognizer( | ||
| 168 | delete recognizer; | 168 | delete recognizer; |
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | -SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream( | 171 | +const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream( |
| 172 | const SherpaOnnxOnlineRecognizer *recognizer) { | 172 | const SherpaOnnxOnlineRecognizer *recognizer) { |
| 173 | SherpaOnnxOnlineStream *stream = | 173 | SherpaOnnxOnlineStream *stream = |
| 174 | new SherpaOnnxOnlineStream(recognizer->impl->CreateStream()); | 174 | new SherpaOnnxOnlineStream(recognizer->impl->CreateStream()); |
| 175 | return stream; | 175 | return stream; |
| 176 | } | 176 | } |
| 177 | 177 | ||
| 178 | -SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStreamWithHotwords( | 178 | +const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStreamWithHotwords( |
| 179 | const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords) { | 179 | const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords) { |
| 180 | SherpaOnnxOnlineStream *stream = | 180 | SherpaOnnxOnlineStream *stream = |
| 181 | new SherpaOnnxOnlineStream(recognizer->impl->CreateStream(hotwords)); | 181 | new SherpaOnnxOnlineStream(recognizer->impl->CreateStream(hotwords)); |
| @@ -351,7 +351,7 @@ struct SherpaOnnxOfflineStream { | @@ -351,7 +351,7 @@ struct SherpaOnnxOfflineStream { | ||
| 351 | static sherpa_onnx::OfflineRecognizerConfig convertConfig( | 351 | static sherpa_onnx::OfflineRecognizerConfig convertConfig( |
| 352 | const SherpaOnnxOfflineRecognizerConfig *config); | 352 | const SherpaOnnxOfflineRecognizerConfig *config); |
| 353 | 353 | ||
| 354 | -SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizer( | 354 | +const SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizer( |
| 355 | const SherpaOnnxOfflineRecognizerConfig *config) { | 355 | const SherpaOnnxOfflineRecognizerConfig *config) { |
| 356 | sherpa_onnx::OfflineRecognizerConfig recognizer_config = | 356 | sherpa_onnx::OfflineRecognizerConfig recognizer_config = |
| 357 | convertConfig(config); | 357 | convertConfig(config); |
| @@ -490,11 +490,11 @@ void SherpaOnnxOfflineRecognizerSetConfig( | @@ -490,11 +490,11 @@ void SherpaOnnxOfflineRecognizerSetConfig( | ||
| 490 | } | 490 | } |
| 491 | 491 | ||
| 492 | void SherpaOnnxDestroyOfflineRecognizer( | 492 | void SherpaOnnxDestroyOfflineRecognizer( |
| 493 | - SherpaOnnxOfflineRecognizer *recognizer) { | 493 | + const SherpaOnnxOfflineRecognizer *recognizer) { |
| 494 | delete recognizer; | 494 | delete recognizer; |
| 495 | } | 495 | } |
| 496 | 496 | ||
| 497 | -SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream( | 497 | +const SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream( |
| 498 | const SherpaOnnxOfflineRecognizer *recognizer) { | 498 | const SherpaOnnxOfflineRecognizer *recognizer) { |
| 499 | SherpaOnnxOfflineStream *stream = | 499 | SherpaOnnxOfflineStream *stream = |
| 500 | new SherpaOnnxOfflineStream(recognizer->impl->CreateStream()); | 500 | new SherpaOnnxOfflineStream(recognizer->impl->CreateStream()); |
| @@ -518,8 +518,8 @@ void SherpaOnnxDecodeOfflineStream( | @@ -518,8 +518,8 @@ void SherpaOnnxDecodeOfflineStream( | ||
| 518 | } | 518 | } |
| 519 | 519 | ||
| 520 | void SherpaOnnxDecodeMultipleOfflineStreams( | 520 | void SherpaOnnxDecodeMultipleOfflineStreams( |
| 521 | - SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams, | ||
| 522 | - int32_t n) { | 521 | + const SherpaOnnxOfflineRecognizer *recognizer, |
| 522 | + const SherpaOnnxOfflineStream **streams, int32_t n) { | ||
| 523 | std::vector<sherpa_onnx::OfflineStream *> ss(n); | 523 | std::vector<sherpa_onnx::OfflineStream *> ss(n); |
| 524 | for (int32_t i = 0; i != n; ++i) { | 524 | for (int32_t i = 0; i != n; ++i) { |
| 525 | ss[i] = streams[i]->impl.get(); | 525 | ss[i] = streams[i]->impl.get(); |
| @@ -220,7 +220,7 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOnlineRecognizer( | @@ -220,7 +220,7 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOnlineRecognizer( | ||
| 220 | /// @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer() | 220 | /// @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer() |
| 221 | /// @return Return a pointer to an OnlineStream. The user has to invoke | 221 | /// @return Return a pointer to an OnlineStream. The user has to invoke |
| 222 | /// SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak. | 222 | /// SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak. |
| 223 | -SHERPA_ONNX_API SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream( | 223 | +SHERPA_ONNX_API const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream( |
| 224 | const SherpaOnnxOnlineRecognizer *recognizer); | 224 | const SherpaOnnxOnlineRecognizer *recognizer); |
| 225 | 225 | ||
| 226 | /// Create an online stream for accepting wave samples with the specified hot | 226 | /// Create an online stream for accepting wave samples with the specified hot |
| @@ -229,7 +229,7 @@ SHERPA_ONNX_API SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream( | @@ -229,7 +229,7 @@ SHERPA_ONNX_API SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream( | ||
| 229 | /// @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer() | 229 | /// @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer() |
| 230 | /// @return Return a pointer to an OnlineStream. The user has to invoke | 230 | /// @return Return a pointer to an OnlineStream. The user has to invoke |
| 231 | /// SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak. | 231 | /// SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak. |
| 232 | -SHERPA_ONNX_API SherpaOnnxOnlineStream * | 232 | +SHERPA_ONNX_API const SherpaOnnxOnlineStream * |
| 233 | SherpaOnnxCreateOnlineStreamWithHotwords( | 233 | SherpaOnnxCreateOnlineStreamWithHotwords( |
| 234 | const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords); | 234 | const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords); |
| 235 | 235 | ||
| @@ -453,7 +453,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream; | @@ -453,7 +453,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream; | ||
| 453 | /// @return Return a pointer to the recognizer. The user has to invoke | 453 | /// @return Return a pointer to the recognizer. The user has to invoke |
| 454 | // SherpaOnnxDestroyOfflineRecognizer() to free it to avoid memory | 454 | // SherpaOnnxDestroyOfflineRecognizer() to free it to avoid memory |
| 455 | // leak. | 455 | // leak. |
| 456 | -SHERPA_ONNX_API SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizer( | 456 | +SHERPA_ONNX_API const SherpaOnnxOfflineRecognizer * |
| 457 | +SherpaOnnxCreateOfflineRecognizer( | ||
| 457 | const SherpaOnnxOfflineRecognizerConfig *config); | 458 | const SherpaOnnxOfflineRecognizerConfig *config); |
| 458 | 459 | ||
| 459 | /// @param config Config for the recognizer. | 460 | /// @param config Config for the recognizer. |
| @@ -465,14 +466,14 @@ SHERPA_ONNX_API void SherpaOnnxOfflineRecognizerSetConfig( | @@ -465,14 +466,14 @@ SHERPA_ONNX_API void SherpaOnnxOfflineRecognizerSetConfig( | ||
| 465 | /// | 466 | /// |
| 466 | /// @param p A pointer returned by SherpaOnnxCreateOfflineRecognizer() | 467 | /// @param p A pointer returned by SherpaOnnxCreateOfflineRecognizer() |
| 467 | SHERPA_ONNX_API void SherpaOnnxDestroyOfflineRecognizer( | 468 | SHERPA_ONNX_API void SherpaOnnxDestroyOfflineRecognizer( |
| 468 | - SherpaOnnxOfflineRecognizer *recognizer); | 469 | + const SherpaOnnxOfflineRecognizer *recognizer); |
| 469 | 470 | ||
| 470 | /// Create an offline stream for accepting wave samples. | 471 | /// Create an offline stream for accepting wave samples. |
| 471 | /// | 472 | /// |
| 472 | /// @param recognizer A pointer returned by SherpaOnnxCreateOfflineRecognizer() | 473 | /// @param recognizer A pointer returned by SherpaOnnxCreateOfflineRecognizer() |
| 473 | /// @return Return a pointer to an OfflineStream. The user has to invoke | 474 | /// @return Return a pointer to an OfflineStream. The user has to invoke |
| 474 | /// SherpaOnnxDestroyOfflineStream() to free it to avoid memory leak. | 475 | /// SherpaOnnxDestroyOfflineStream() to free it to avoid memory leak. |
| 475 | -SHERPA_ONNX_API SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream( | 476 | +SHERPA_ONNX_API const SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream( |
| 476 | const SherpaOnnxOfflineRecognizer *recognizer); | 477 | const SherpaOnnxOfflineRecognizer *recognizer); |
| 477 | 478 | ||
| 478 | /// Destroy an offline stream. | 479 | /// Destroy an offline stream. |
| @@ -518,8 +519,8 @@ SHERPA_ONNX_API void SherpaOnnxDecodeOfflineStream( | @@ -518,8 +519,8 @@ SHERPA_ONNX_API void SherpaOnnxDecodeOfflineStream( | ||
| 518 | /// by SherpaOnnxCreateOfflineStream(). | 519 | /// by SherpaOnnxCreateOfflineStream(). |
| 519 | /// @param n Number of entries in the given streams. | 520 | /// @param n Number of entries in the given streams. |
| 520 | SHERPA_ONNX_API void SherpaOnnxDecodeMultipleOfflineStreams( | 521 | SHERPA_ONNX_API void SherpaOnnxDecodeMultipleOfflineStreams( |
| 521 | - SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams, | ||
| 522 | - int32_t n); | 522 | + const SherpaOnnxOfflineRecognizer *recognizer, |
| 523 | + const SherpaOnnxOfflineStream **streams, int32_t n); | ||
| 523 | 524 | ||
| 524 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult { | 525 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult { |
| 525 | const char *text; | 526 | const char *text; |
| @@ -36,6 +36,10 @@ void OnlineStream::AcceptWaveform(int32_t sample_rate, const float *samples, | @@ -36,6 +36,10 @@ void OnlineStream::AcceptWaveform(int32_t sample_rate, const float *samples, | ||
| 36 | SherpaOnnxOnlineStreamAcceptWaveform(p_, sample_rate, samples, n); | 36 | SherpaOnnxOnlineStreamAcceptWaveform(p_, sample_rate, samples, n); |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | +void OnlineStream::InputFinished() const { | ||
| 40 | + SherpaOnnxOnlineStreamInputFinished(p_); | ||
| 41 | +} | ||
| 42 | + | ||
| 39 | OnlineRecognizer OnlineRecognizer::Create( | 43 | OnlineRecognizer OnlineRecognizer::Create( |
| 40 | const OnlineRecognizerConfig &config) { | 44 | const OnlineRecognizerConfig &config) { |
| 41 | struct SherpaOnnxOnlineRecognizerConfig c; | 45 | struct SherpaOnnxOnlineRecognizerConfig c; |
| @@ -119,6 +123,14 @@ void OnlineRecognizer::Decode(const OnlineStream *s) const { | @@ -119,6 +123,14 @@ void OnlineRecognizer::Decode(const OnlineStream *s) const { | ||
| 119 | SherpaOnnxDecodeOnlineStream(p_, s->Get()); | 123 | SherpaOnnxDecodeOnlineStream(p_, s->Get()); |
| 120 | } | 124 | } |
| 121 | 125 | ||
| 126 | +void OnlineRecognizer::Reset(const OnlineStream *s) const { | ||
| 127 | + SherpaOnnxOnlineStreamReset(p_, s->Get()); | ||
| 128 | +} | ||
| 129 | + | ||
| 130 | +bool OnlineRecognizer::IsEndpoint(const OnlineStream *s) const { | ||
| 131 | + return SherpaOnnxOnlineStreamIsEndpoint(p_, s->Get()); | ||
| 132 | +} | ||
| 133 | + | ||
| 122 | void OnlineRecognizer::Decode(const OnlineStream *ss, int32_t n) const { | 134 | void OnlineRecognizer::Decode(const OnlineStream *ss, int32_t n) const { |
| 123 | if (n <= 0) { | 135 | if (n <= 0) { |
| 124 | return; | 136 | return; |
| @@ -156,4 +168,138 @@ OnlineRecognizerResult OnlineRecognizer::GetResult( | @@ -156,4 +168,138 @@ OnlineRecognizerResult OnlineRecognizer::GetResult( | ||
| 156 | return ans; | 168 | return ans; |
| 157 | } | 169 | } |
| 158 | 170 | ||
| 171 | +// ============================================================================ | ||
| 172 | +// Non-streaming ASR | ||
| 173 | +// ============================================================================ | ||
| 174 | +OfflineStream::OfflineStream(const SherpaOnnxOfflineStream *p) | ||
| 175 | + : MoveOnly<OfflineStream, SherpaOnnxOfflineStream>(p) {} | ||
| 176 | + | ||
| 177 | +void OfflineStream::Destroy(const SherpaOnnxOfflineStream *p) const { | ||
| 178 | + SherpaOnnxDestroyOfflineStream(p); | ||
| 179 | +} | ||
| 180 | + | ||
| 181 | +void OfflineStream::AcceptWaveform(int32_t sample_rate, const float *samples, | ||
| 182 | + int32_t n) const { | ||
| 183 | + SherpaOnnxAcceptWaveformOffline(p_, sample_rate, samples, n); | ||
| 184 | +} | ||
| 185 | + | ||
| 186 | +OfflineRecognizer OfflineRecognizer::Create( | ||
| 187 | + const OfflineRecognizerConfig &config) { | ||
| 188 | + struct SherpaOnnxOfflineRecognizerConfig c; | ||
| 189 | + memset(&c, 0, sizeof(c)); | ||
| 190 | + | ||
| 191 | + c.feat_config.sample_rate = config.feat_config.sample_rate; | ||
| 192 | + c.feat_config.feature_dim = config.feat_config.feature_dim; | ||
| 193 | + c.model_config.transducer.encoder = | ||
| 194 | + config.model_config.transducer.encoder.c_str(); | ||
| 195 | + c.model_config.transducer.decoder = | ||
| 196 | + config.model_config.transducer.decoder.c_str(); | ||
| 197 | + c.model_config.transducer.joiner = | ||
| 198 | + config.model_config.transducer.joiner.c_str(); | ||
| 199 | + | ||
| 200 | + c.model_config.paraformer.model = | ||
| 201 | + config.model_config.paraformer.model.c_str(); | ||
| 202 | + | ||
| 203 | + c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str(); | ||
| 204 | + | ||
| 205 | + c.model_config.whisper.encoder = config.model_config.whisper.encoder.c_str(); | ||
| 206 | + c.model_config.whisper.decoder = config.model_config.whisper.decoder.c_str(); | ||
| 207 | + c.model_config.whisper.language = | ||
| 208 | + config.model_config.whisper.language.c_str(); | ||
| 209 | + c.model_config.whisper.task = config.model_config.whisper.task.c_str(); | ||
| 210 | + c.model_config.whisper.tail_paddings = | ||
| 211 | + config.model_config.whisper.tail_paddings; | ||
| 212 | + | ||
| 213 | + c.model_config.tdnn.model = config.model_config.tdnn.model.c_str(); | ||
| 214 | + | ||
| 215 | + c.model_config.tokens = config.model_config.tokens.c_str(); | ||
| 216 | + c.model_config.num_threads = config.model_config.num_threads; | ||
| 217 | + c.model_config.debug = config.model_config.debug; | ||
| 218 | + c.model_config.provider = config.model_config.provider.c_str(); | ||
| 219 | + c.model_config.model_type = config.model_config.model_type.c_str(); | ||
| 220 | + c.model_config.modeling_unit = config.model_config.modeling_unit.c_str(); | ||
| 221 | + c.model_config.bpe_vocab = config.model_config.bpe_vocab.c_str(); | ||
| 222 | + c.model_config.telespeech_ctc = config.model_config.telespeech_ctc.c_str(); | ||
| 223 | + | ||
| 224 | + c.model_config.sense_voice.model = | ||
| 225 | + config.model_config.sense_voice.model.c_str(); | ||
| 226 | + c.model_config.sense_voice.language = | ||
| 227 | + config.model_config.sense_voice.language.c_str(); | ||
| 228 | + c.model_config.sense_voice.use_itn = config.model_config.sense_voice.use_itn; | ||
| 229 | + | ||
| 230 | + c.lm_config.model = config.lm_config.model.c_str(); | ||
| 231 | + c.lm_config.scale = config.lm_config.scale; | ||
| 232 | + | ||
| 233 | + c.decoding_method = config.decoding_method.c_str(); | ||
| 234 | + c.max_active_paths = config.max_active_paths; | ||
| 235 | + c.hotwords_file = config.hotwords_file.c_str(); | ||
| 236 | + c.hotwords_score = config.hotwords_score; | ||
| 237 | + | ||
| 238 | + c.rule_fsts = config.rule_fsts.c_str(); | ||
| 239 | + c.rule_fars = config.rule_fars.c_str(); | ||
| 240 | + | ||
| 241 | + c.blank_penalty = config.blank_penalty; | ||
| 242 | + | ||
| 243 | + auto p = SherpaOnnxCreateOfflineRecognizer(&c); | ||
| 244 | + return OfflineRecognizer(p); | ||
| 245 | +} | ||
| 246 | + | ||
| 247 | +OfflineRecognizer::OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p) | ||
| 248 | + : MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer>(p) {} | ||
| 249 | + | ||
| 250 | +void OfflineRecognizer::Destroy(const SherpaOnnxOfflineRecognizer *p) const { | ||
| 251 | + SherpaOnnxDestroyOfflineRecognizer(p_); | ||
| 252 | +} | ||
| 253 | + | ||
| 254 | +OfflineStream OfflineRecognizer::CreateStream() const { | ||
| 255 | + auto p = SherpaOnnxCreateOfflineStream(p_); | ||
| 256 | + return OfflineStream{p}; | ||
| 257 | +} | ||
| 258 | + | ||
| 259 | +void OfflineRecognizer::Decode(const OfflineStream *s) const { | ||
| 260 | + SherpaOnnxDecodeOfflineStream(p_, s->Get()); | ||
| 261 | +} | ||
| 262 | + | ||
| 263 | +void OfflineRecognizer::Decode(const OfflineStream *ss, int32_t n) const { | ||
| 264 | + if (n <= 0) { | ||
| 265 | + return; | ||
| 266 | + } | ||
| 267 | + | ||
| 268 | + std::vector<const SherpaOnnxOfflineStream *> streams(n); | ||
| 269 | + for (int32_t i = 0; i != n; ++i) { | ||
| 270 | + streams[i] = ss[i].Get(); | ||
| 271 | + } | ||
| 272 | + | ||
| 273 | + SherpaOnnxDecodeMultipleOfflineStreams(p_, streams.data(), n); | ||
| 274 | +} | ||
| 275 | + | ||
| 276 | +OfflineRecognizerResult OfflineRecognizer::GetResult( | ||
| 277 | + const OfflineStream *s) const { | ||
| 278 | + auto r = SherpaOnnxGetOfflineStreamResult(s->Get()); | ||
| 279 | + | ||
| 280 | + OfflineRecognizerResult ans; | ||
| 281 | + if (r) { | ||
| 282 | + ans.text = r->text; | ||
| 283 | + | ||
| 284 | + if (r->timestamps) { | ||
| 285 | + ans.timestamps.resize(r->count); | ||
| 286 | + std::copy(r->timestamps, r->timestamps + r->count, ans.timestamps.data()); | ||
| 287 | + } | ||
| 288 | + | ||
| 289 | + ans.tokens.resize(r->count); | ||
| 290 | + for (int32_t i = 0; i != r->count; ++i) { | ||
| 291 | + ans.tokens[i] = r->tokens_arr[i]; | ||
| 292 | + } | ||
| 293 | + | ||
| 294 | + ans.json = r->json; | ||
| 295 | + ans.lang = r->lang ? r->lang : ""; | ||
| 296 | + ans.emotion = r->emotion ? r->emotion : ""; | ||
| 297 | + ans.event = r->event ? r->event : ""; | ||
| 298 | + } | ||
| 299 | + | ||
| 300 | + SherpaOnnxDestroyOfflineRecognizerResult(r); | ||
| 301 | + | ||
| 302 | + return ans; | ||
| 303 | +} | ||
| 304 | + | ||
| 159 | } // namespace sherpa_onnx::cxx | 305 | } // namespace sherpa_onnx::cxx |
| @@ -13,6 +13,9 @@ | @@ -13,6 +13,9 @@ | ||
| 13 | 13 | ||
| 14 | namespace sherpa_onnx::cxx { | 14 | namespace sherpa_onnx::cxx { |
| 15 | 15 | ||
| 16 | +// ============================================================================ | ||
| 17 | +// Streaming ASR | ||
| 18 | +// ============================================================================ | ||
| 16 | struct SHERPA_ONNX_API OnlineTransducerModelConfig { | 19 | struct SHERPA_ONNX_API OnlineTransducerModelConfig { |
| 17 | std::string encoder; | 20 | std::string encoder; |
| 18 | std::string decoder; | 21 | std::string decoder; |
| @@ -148,6 +151,8 @@ class SHERPA_ONNX_API OnlineStream | @@ -148,6 +151,8 @@ class SHERPA_ONNX_API OnlineStream | ||
| 148 | void AcceptWaveform(int32_t sample_rate, const float *samples, | 151 | void AcceptWaveform(int32_t sample_rate, const float *samples, |
| 149 | int32_t n) const; | 152 | int32_t n) const; |
| 150 | 153 | ||
| 154 | + void InputFinished() const; | ||
| 155 | + | ||
| 151 | void Destroy(const SherpaOnnxOnlineStream *p) const; | 156 | void Destroy(const SherpaOnnxOnlineStream *p) const; |
| 152 | }; | 157 | }; |
| 153 | 158 | ||
| @@ -170,10 +175,134 @@ class SHERPA_ONNX_API OnlineRecognizer | @@ -170,10 +175,134 @@ class SHERPA_ONNX_API OnlineRecognizer | ||
| 170 | 175 | ||
| 171 | OnlineRecognizerResult GetResult(const OnlineStream *s) const; | 176 | OnlineRecognizerResult GetResult(const OnlineStream *s) const; |
| 172 | 177 | ||
| 178 | + void Reset(const OnlineStream *s) const; | ||
| 179 | + | ||
| 180 | + bool IsEndpoint(const OnlineStream *s) const; | ||
| 181 | + | ||
| 173 | private: | 182 | private: |
| 174 | explicit OnlineRecognizer(const SherpaOnnxOnlineRecognizer *p); | 183 | explicit OnlineRecognizer(const SherpaOnnxOnlineRecognizer *p); |
| 175 | }; | 184 | }; |
| 176 | 185 | ||
| 186 | +// ============================================================================ | ||
| 187 | +// Non-streaming ASR | ||
| 188 | +// ============================================================================ | ||
| 189 | +struct SHERPA_ONNX_API OfflineTransducerModelConfig { | ||
| 190 | + std::string encoder; | ||
| 191 | + std::string decoder; | ||
| 192 | + std::string joiner; | ||
| 193 | +}; | ||
| 194 | + | ||
| 195 | +struct SHERPA_ONNX_API OfflineParaformerModelConfig { | ||
| 196 | + std::string model; | ||
| 197 | +}; | ||
| 198 | + | ||
| 199 | +struct SHERPA_ONNX_API OfflineNemoEncDecCtcModelConfig { | ||
| 200 | + std::string model; | ||
| 201 | +}; | ||
| 202 | + | ||
| 203 | +struct SHERPA_ONNX_API OfflineWhisperModelConfig { | ||
| 204 | + std::string encoder; | ||
| 205 | + std::string decoder; | ||
| 206 | + std::string language; | ||
| 207 | + std::string task = "transcribe"; | ||
| 208 | + int32_t tail_paddings = -1; | ||
| 209 | +}; | ||
| 210 | + | ||
| 211 | +struct SHERPA_ONNX_API OfflineTdnnModelConfig { | ||
| 212 | + std::string model; | ||
| 213 | +}; | ||
| 214 | + | ||
| 215 | +struct SHERPA_ONNX_API SherpaOnnxOfflineLMConfig { | ||
| 216 | + std::string model; | ||
| 217 | + float scale = 1.0; | ||
| 218 | +}; | ||
| 219 | + | ||
| 220 | +struct SHERPA_ONNX_API OfflineSenseVoiceModelConfig { | ||
| 221 | + std::string model; | ||
| 222 | + std::string language; | ||
| 223 | + bool use_itn = false; | ||
| 224 | +}; | ||
| 225 | + | ||
| 226 | +struct SHERPA_ONNX_API OfflineModelConfig { | ||
| 227 | + OfflineTransducerModelConfig transducer; | ||
| 228 | + OfflineParaformerModelConfig paraformer; | ||
| 229 | + OfflineNemoEncDecCtcModelConfig nemo_ctc; | ||
| 230 | + OfflineWhisperModelConfig whisper; | ||
| 231 | + OfflineTdnnModelConfig tdnn; | ||
| 232 | + | ||
| 233 | + std::string tokens; | ||
| 234 | + int32_t num_threads = 1; | ||
| 235 | + bool debug = false; | ||
| 236 | + std::string provider = "cpu"; | ||
| 237 | + std::string model_type; | ||
| 238 | + std::string modeling_unit = "cjkchar"; | ||
| 239 | + std::string bpe_vocab; | ||
| 240 | + std::string telespeech_ctc; | ||
| 241 | + OfflineSenseVoiceModelConfig sense_voice; | ||
| 242 | +}; | ||
| 243 | + | ||
| 244 | +struct SHERPA_ONNX_API OfflineLMConfig { | ||
| 245 | + std::string model; | ||
| 246 | + float scale = 1.0; | ||
| 247 | +}; | ||
| 248 | + | ||
| 249 | +struct SHERPA_ONNX_API OfflineRecognizerConfig { | ||
| 250 | + FeatureConfig feat_config; | ||
| 251 | + OfflineModelConfig model_config; | ||
| 252 | + OfflineLMConfig lm_config; | ||
| 253 | + | ||
| 254 | + std::string decoding_method = "greedy_search"; | ||
| 255 | + int32_t max_active_paths = 4; | ||
| 256 | + | ||
| 257 | + std::string hotwords_file; | ||
| 258 | + | ||
| 259 | + float hotwords_score = 1.5; | ||
| 260 | + std::string rule_fsts; | ||
| 261 | + std::string rule_fars; | ||
| 262 | + float blank_penalty = 0; | ||
| 263 | +}; | ||
| 264 | + | ||
| 265 | +struct SHERPA_ONNX_API OfflineRecognizerResult { | ||
| 266 | + std::string text; | ||
| 267 | + std::vector<float> timestamps; | ||
| 268 | + std::vector<std::string> tokens; | ||
| 269 | + std::string json; | ||
| 270 | + std::string lang; | ||
| 271 | + std::string emotion; | ||
| 272 | + std::string event; | ||
| 273 | +}; | ||
| 274 | + | ||
| 275 | +class SHERPA_ONNX_API OfflineStream | ||
| 276 | + : public MoveOnly<OfflineStream, SherpaOnnxOfflineStream> { | ||
| 277 | + public: | ||
| 278 | + explicit OfflineStream(const SherpaOnnxOfflineStream *p); | ||
| 279 | + | ||
| 280 | + void AcceptWaveform(int32_t sample_rate, const float *samples, | ||
| 281 | + int32_t n) const; | ||
| 282 | + | ||
| 283 | + void Destroy(const SherpaOnnxOfflineStream *p) const; | ||
| 284 | +}; | ||
| 285 | + | ||
| 286 | +class SHERPA_ONNX_API OfflineRecognizer | ||
| 287 | + : public MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer> { | ||
| 288 | + public: | ||
| 289 | + static OfflineRecognizer Create(const OfflineRecognizerConfig &config); | ||
| 290 | + | ||
| 291 | + void Destroy(const SherpaOnnxOfflineRecognizer *p) const; | ||
| 292 | + | ||
| 293 | + OfflineStream CreateStream() const; | ||
| 294 | + | ||
| 295 | + void Decode(const OfflineStream *s) const; | ||
| 296 | + | ||
| 297 | + void Decode(const OfflineStream *ss, int32_t n) const; | ||
| 298 | + | ||
| 299 | + OfflineRecognizerResult GetResult(const OfflineStream *s) const; | ||
| 300 | + | ||
| 301 | + private: | ||
| 302 | + explicit OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p); | ||
| 303 | +}; | ||
| 304 | + | ||
| 177 | } // namespace sherpa_onnx::cxx | 305 | } // namespace sherpa_onnx::cxx |
| 178 | 306 | ||
| 179 | #endif // SHERPA_ONNX_C_API_CXX_API_H_ | 307 | #endif // SHERPA_ONNX_C_API_CXX_API_H_ |
| 308 | + // |
| @@ -30,9 +30,13 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create( | @@ -30,9 +30,13 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create( | ||
| 30 | if (!config.model_config.transducer.encoder.empty()) { | 30 | if (!config.model_config.transducer.encoder.empty()) { |
| 31 | Ort::Env env(ORT_LOGGING_LEVEL_ERROR); | 31 | Ort::Env env(ORT_LOGGING_LEVEL_ERROR); |
| 32 | 32 | ||
| 33 | + Ort::SessionOptions sess_opts; | ||
| 34 | + sess_opts.SetIntraOpNumThreads(1); | ||
| 35 | + sess_opts.SetInterOpNumThreads(1); | ||
| 36 | + | ||
| 33 | auto decoder_model = ReadFile(config.model_config.transducer.decoder); | 37 | auto decoder_model = ReadFile(config.model_config.transducer.decoder); |
| 34 | - auto sess = std::make_unique<Ort::Session>( | ||
| 35 | - env, decoder_model.data(), decoder_model.size(), Ort::SessionOptions{}); | 38 | + auto sess = std::make_unique<Ort::Session>(env, decoder_model.data(), |
| 39 | + decoder_model.size(), sess_opts); | ||
| 36 | 40 | ||
| 37 | size_t node_count = sess->GetOutputCount(); | 41 | size_t node_count = sess->GetOutputCount(); |
| 38 | 42 | ||
| @@ -63,9 +67,13 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create( | @@ -63,9 +67,13 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create( | ||
| 63 | if (!config.model_config.transducer.encoder.empty()) { | 67 | if (!config.model_config.transducer.encoder.empty()) { |
| 64 | Ort::Env env(ORT_LOGGING_LEVEL_ERROR); | 68 | Ort::Env env(ORT_LOGGING_LEVEL_ERROR); |
| 65 | 69 | ||
| 70 | + Ort::SessionOptions sess_opts; | ||
| 71 | + sess_opts.SetIntraOpNumThreads(1); | ||
| 72 | + sess_opts.SetInterOpNumThreads(1); | ||
| 73 | + | ||
| 66 | auto decoder_model = ReadFile(mgr, config.model_config.transducer.decoder); | 74 | auto decoder_model = ReadFile(mgr, config.model_config.transducer.decoder); |
| 67 | - auto sess = std::make_unique<Ort::Session>( | ||
| 68 | - env, decoder_model.data(), decoder_model.size(), Ort::SessionOptions{}); | 75 | + auto sess = std::make_unique<Ort::Session>(env, decoder_model.data(), |
| 76 | + decoder_model.size(), sess_opts); | ||
| 69 | 77 | ||
| 70 | size_t node_count = sess->GetOutputCount(); | 78 | size_t node_count = sess->GetOutputCount(); |
| 71 | 79 |
-
请 注册 或 登录 后发表评论