Committed by
GitHub
Add C API for spoken language identification. (#695)
正在显示
18 个修改的文件
包含
363 行增加
和
67 行删除
.github/scripts/test-c-api.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -e | ||
| 4 | + | ||
| 5 | +log() { | ||
| 6 | + # This function is from espnet | ||
| 7 | + local fname=${BASH_SOURCE[1]##*/} | ||
| 8 | + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +echo "SLID_EXE is $SLID_EXE" | ||
| 12 | +echo "PATH: $PATH" | ||
| 13 | + | ||
| 14 | + | ||
| 15 | +log "------------------------------------------------------------" | ||
| 16 | +log "Download whisper tiny for spoken language identification " | ||
| 17 | +log "------------------------------------------------------------" | ||
| 18 | + | ||
| 19 | +rm -rf sherpa-onnx-whisper-tiny* | ||
| 20 | +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 21 | +tar xvf sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 22 | +rm sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 23 | + | ||
| 24 | +$SLID_EXE | ||
| 25 | + | ||
| 26 | +rm -rf sherpa-onnx-whisper-tiny* |
| @@ -28,32 +28,32 @@ ar-arabic.wav | @@ -28,32 +28,32 @@ ar-arabic.wav | ||
| 28 | bg-bulgarian.wav | 28 | bg-bulgarian.wav |
| 29 | cs-czech.wav | 29 | cs-czech.wav |
| 30 | da-danish.wav | 30 | da-danish.wav |
| 31 | -de-german.wav | ||
| 32 | -el-greek.wav | ||
| 33 | -en-english.wav | ||
| 34 | -es-spanish.wav | ||
| 35 | -fa-persian.wav | ||
| 36 | -fi-finnish.wav | ||
| 37 | -fr-french.wav | ||
| 38 | -hi-hindi.wav | ||
| 39 | -hr-croatian.wav | ||
| 40 | -id-indonesian.wav | ||
| 41 | -it-italian.wav | ||
| 42 | -ja-japanese.wav | ||
| 43 | -ko-korean.wav | ||
| 44 | -nl-dutch.wav | ||
| 45 | -no-norwegian.wav | ||
| 46 | -po-polish.wav | ||
| 47 | -pt-portuguese.wav | ||
| 48 | -ro-romanian.wav | ||
| 49 | -ru-russian.wav | ||
| 50 | -sk-slovak.wav | ||
| 51 | -sv-swedish.wav | ||
| 52 | -ta-tamil.wav | ||
| 53 | -tl-tagalog.wav | ||
| 54 | -tr-turkish.wav | ||
| 55 | -uk-ukrainian.wav | ||
| 56 | -zh-chinese.wav | 31 | +# de-german.wav |
| 32 | +# el-greek.wav | ||
| 33 | +# en-english.wav | ||
| 34 | +# es-spanish.wav | ||
| 35 | +# fa-persian.wav | ||
| 36 | +# fi-finnish.wav | ||
| 37 | +# fr-french.wav | ||
| 38 | +# hi-hindi.wav | ||
| 39 | +# hr-croatian.wav | ||
| 40 | +# id-indonesian.wav | ||
| 41 | +# it-italian.wav | ||
| 42 | +# ja-japanese.wav | ||
| 43 | +# ko-korean.wav | ||
| 44 | +# nl-dutch.wav | ||
| 45 | +# no-norwegian.wav | ||
| 46 | +# po-polish.wav | ||
| 47 | +# pt-portuguese.wav | ||
| 48 | +# ro-romanian.wav | ||
| 49 | +# ru-russian.wav | ||
| 50 | +# sk-slovak.wav | ||
| 51 | +# sv-swedish.wav | ||
| 52 | +# ta-tamil.wav | ||
| 53 | +# tl-tagalog.wav | ||
| 54 | +# tr-turkish.wav | ||
| 55 | +# uk-ukrainian.wav | ||
| 56 | +# zh-chinese.wav | ||
| 57 | ) | 57 | ) |
| 58 | 58 | ||
| 59 | for wav in ${waves[@]}; do | 59 | for wav in ${waves[@]}; do |
| @@ -113,6 +113,7 @@ jobs: | @@ -113,6 +113,7 @@ jobs: | ||
| 113 | git config --global user.email "csukuangfj@gmail.com" | 113 | git config --global user.email "csukuangfj@gmail.com" |
| 114 | git config --global user.name "Fangjun Kuang" | 114 | git config --global user.name "Fangjun Kuang" |
| 115 | 115 | ||
| 116 | + rm -rf huggingface | ||
| 116 | GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface | 117 | GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface |
| 117 | 118 | ||
| 118 | cd huggingface | 119 | cd huggingface |
| @@ -90,6 +90,7 @@ jobs: | @@ -90,6 +90,7 @@ jobs: | ||
| 90 | git config --global user.email "csukuangfj@gmail.com" | 90 | git config --global user.email "csukuangfj@gmail.com" |
| 91 | git config --global user.name "Fangjun Kuang" | 91 | git config --global user.name "Fangjun Kuang" |
| 92 | 92 | ||
| 93 | + rm -rf huggingface | ||
| 93 | GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface | 94 | GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface |
| 94 | 95 | ||
| 95 | cd huggingface | 96 | cd huggingface |
| @@ -123,8 +123,15 @@ jobs: | @@ -123,8 +123,15 @@ jobs: | ||
| 123 | name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }} | 123 | name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }} |
| 124 | path: build/bin/* | 124 | path: build/bin/* |
| 125 | 125 | ||
| 126 | - - name: Test spoken language identification | ||
| 127 | - if: matrix.build_type != 'Debug' | 126 | + - name: Test spoken language identification (C API) |
| 127 | + shell: bash | ||
| 128 | + run: | | ||
| 129 | + export PATH=$PWD/build/bin:$PATH | ||
| 130 | + export SLID_EXE=spoken-language-identification-c-api | ||
| 131 | + | ||
| 132 | + .github/scripts/test-c-api.sh | ||
| 133 | + | ||
| 134 | + - name: Test spoken language identification (C++ API) | ||
| 128 | shell: bash | 135 | shell: bash |
| 129 | run: | | 136 | run: | |
| 130 | export PATH=$PWD/build/bin:$PATH | 137 | export PATH=$PWD/build/bin:$PATH |
| @@ -243,6 +250,7 @@ jobs: | @@ -243,6 +250,7 @@ jobs: | ||
| 243 | git config --global user.email "csukuangfj@gmail.com" | 250 | git config --global user.email "csukuangfj@gmail.com" |
| 244 | git config --global user.name "Fangjun Kuang" | 251 | git config --global user.name "Fangjun Kuang" |
| 245 | 252 | ||
| 253 | + rm -rf huggingface | ||
| 246 | GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface | 254 | GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface |
| 247 | 255 | ||
| 248 | cd huggingface | 256 | cd huggingface |
| @@ -102,8 +102,15 @@ jobs: | @@ -102,8 +102,15 @@ jobs: | ||
| 102 | otool -L build/bin/sherpa-onnx | 102 | otool -L build/bin/sherpa-onnx |
| 103 | otool -l build/bin/sherpa-onnx | 103 | otool -l build/bin/sherpa-onnx |
| 104 | 104 | ||
| 105 | - - name: Test spoken language identification | ||
| 106 | - if: matrix.build_type != 'Debug' | 105 | + - name: Test spoken language identification (C API) |
| 106 | + shell: bash | ||
| 107 | + run: | | ||
| 108 | + export PATH=$PWD/build/bin:$PATH | ||
| 109 | + export SLID_EXE=spoken-language-identification-c-api | ||
| 110 | + | ||
| 111 | + .github/scripts/test-c-api.sh | ||
| 112 | + | ||
| 113 | + - name: Test spoken language identification (C++ API) | ||
| 107 | shell: bash | 114 | shell: bash |
| 108 | run: | | 115 | run: | |
| 109 | export PATH=$PWD/build/bin:$PATH | 116 | export PATH=$PWD/build/bin:$PATH |
| @@ -68,7 +68,15 @@ jobs: | @@ -68,7 +68,15 @@ jobs: | ||
| 68 | 68 | ||
| 69 | ls -lh ./bin/Release/sherpa-onnx.exe | 69 | ls -lh ./bin/Release/sherpa-onnx.exe |
| 70 | 70 | ||
| 71 | - - name: Test spoken language identification | 71 | + - name: Test spoken language identification (C API) |
| 72 | + shell: bash | ||
| 73 | + run: | | ||
| 74 | + export PATH=$PWD/build/bin/Release:$PATH | ||
| 75 | + export SLID_EXE=spoken-language-identification-c-api.exe | ||
| 76 | + | ||
| 77 | + .github/scripts/test-c-api.sh | ||
| 78 | + | ||
| 79 | + - name: Test spoken language identification (C++ API) | ||
| 72 | shell: bash | 80 | shell: bash |
| 73 | run: | | 81 | run: | |
| 74 | export PATH=$PWD/build/bin/Release:$PATH | 82 | export PATH=$PWD/build/bin/Release:$PATH |
| @@ -69,6 +69,14 @@ jobs: | @@ -69,6 +69,14 @@ jobs: | ||
| 69 | 69 | ||
| 70 | ls -lh ./bin/Release/sherpa-onnx.exe | 70 | ls -lh ./bin/Release/sherpa-onnx.exe |
| 71 | 71 | ||
| 72 | + - name: Test spoken language identification (C API) | ||
| 73 | + shell: bash | ||
| 74 | + run: | | ||
| 75 | + export PATH=$PWD/build/bin/Release:$PATH | ||
| 76 | + export SLID_EXE=spoken-language-identification-c-api.exe | ||
| 77 | + | ||
| 78 | + .github/scripts/test-c-api.sh | ||
| 79 | + | ||
| 72 | # - name: Test spoken language identification | 80 | # - name: Test spoken language identification |
| 73 | # shell: bash | 81 | # shell: bash |
| 74 | # run: | | 82 | # run: | |
| @@ -7,8 +7,11 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) | @@ -7,8 +7,11 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) | ||
| 7 | add_executable(offline-tts-c-api offline-tts-c-api.c) | 7 | add_executable(offline-tts-c-api offline-tts-c-api.c) |
| 8 | target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) | 8 | target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) |
| 9 | 9 | ||
| 10 | +add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c) | ||
| 11 | +target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api) | ||
| 12 | + | ||
| 10 | if(SHERPA_ONNX_HAS_ALSA) | 13 | if(SHERPA_ONNX_HAS_ALSA) |
| 11 | add_subdirectory(./asr-microphone-example) | 14 | add_subdirectory(./asr-microphone-example) |
| 12 | -else() | 15 | +elseif((UNIX AND NOT APPLE) OR LINUX) |
| 13 | message(WARNING "Not include ./asr-microphone-example since alsa is not available") | 16 | message(WARNING "Not include ./asr-microphone-example since alsa is not available") |
| 14 | endif() | 17 | endif() |
| @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd) | @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd) | ||
| 4 | CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/ | 4 | CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/ |
| 5 | LDFLAGS := -L ../build/lib | 5 | LDFLAGS := -L ../build/lib |
| 6 | LDFLAGS += -L ../build/_deps/onnxruntime-src/lib | 6 | LDFLAGS += -L ../build/_deps/onnxruntime-src/lib |
| 7 | -LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs | 7 | +LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime |
| 8 | LDFLAGS += -framework Foundation | 8 | LDFLAGS += -framework Foundation |
| 9 | LDFLAGS += -lc++ | 9 | LDFLAGS += -lc++ |
| 10 | LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib | 10 | LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib |
| @@ -169,55 +169,56 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -169,55 +169,56 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 169 | int32_t segment_id = 0; | 169 | int32_t segment_id = 0; |
| 170 | 170 | ||
| 171 | const char *wav_filename = argv[context.index]; | 171 | const char *wav_filename = argv[context.index]; |
| 172 | - FILE *fp = fopen(wav_filename, "rb"); | ||
| 173 | - if (!fp) { | ||
| 174 | - fprintf(stderr, "Failed to open %s\n", wav_filename); | 172 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); |
| 173 | + if (wave == NULL) { | ||
| 174 | + fprintf(stderr, "Failed to read %s\n", wav_filename); | ||
| 175 | return -1; | 175 | return -1; |
| 176 | } | 176 | } |
| 177 | - | ||
| 178 | - // Assume the wave header occupies 44 bytes. | ||
| 179 | - fseek(fp, 44, SEEK_SET); | ||
| 180 | - | ||
| 181 | // simulate streaming | 177 | // simulate streaming |
| 182 | 178 | ||
| 183 | #define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz | 179 | #define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz |
| 184 | 180 | ||
| 185 | int16_t buffer[N]; | 181 | int16_t buffer[N]; |
| 186 | float samples[N]; | 182 | float samples[N]; |
| 183 | + fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n", | ||
| 184 | + wave->sample_rate, wave->num_samples, | ||
| 185 | + (float)wave->num_samples / wave->sample_rate); | ||
| 186 | + | ||
| 187 | + int32_t k = 0; | ||
| 188 | + while (k < wave->num_samples) { | ||
| 189 | + int32_t start = k; | ||
| 190 | + int32_t end = | ||
| 191 | + (start + N > wave->num_samples) ? wave->num_samples : (start + N); | ||
| 192 | + k += N; | ||
| 193 | + | ||
| 194 | + AcceptWaveform(stream, wave->sample_rate, wave->samples + start, | ||
| 195 | + end - start); | ||
| 196 | + while (IsOnlineStreamReady(recognizer, stream)) { | ||
| 197 | + DecodeOnlineStream(recognizer, stream); | ||
| 198 | + } | ||
| 187 | 199 | ||
| 188 | - while (!feof(fp)) { | ||
| 189 | - size_t n = fread((void *)buffer, sizeof(int16_t), N, fp); | ||
| 190 | - if (n > 0) { | ||
| 191 | - for (size_t i = 0; i != n; ++i) { | ||
| 192 | - samples[i] = buffer[i] / 32768.; | ||
| 193 | - } | ||
| 194 | - AcceptWaveform(stream, 16000, samples, n); | ||
| 195 | - while (IsOnlineStreamReady(recognizer, stream)) { | ||
| 196 | - DecodeOnlineStream(recognizer, stream); | ||
| 197 | - } | 200 | + const SherpaOnnxOnlineRecognizerResult *r = |
| 201 | + GetOnlineStreamResult(recognizer, stream); | ||
| 198 | 202 | ||
| 199 | - const SherpaOnnxOnlineRecognizerResult *r = | ||
| 200 | - GetOnlineStreamResult(recognizer, stream); | 203 | + if (strlen(r->text)) { |
| 204 | + SherpaOnnxPrint(display, segment_id, r->text); | ||
| 205 | + } | ||
| 201 | 206 | ||
| 207 | + if (IsEndpoint(recognizer, stream)) { | ||
| 202 | if (strlen(r->text)) { | 208 | if (strlen(r->text)) { |
| 203 | - SherpaOnnxPrint(display, segment_id, r->text); | 209 | + ++segment_id; |
| 204 | } | 210 | } |
| 205 | - | ||
| 206 | - if (IsEndpoint(recognizer, stream)) { | ||
| 207 | - if (strlen(r->text)) { | ||
| 208 | - ++segment_id; | ||
| 209 | - } | ||
| 210 | - Reset(recognizer, stream); | ||
| 211 | - } | ||
| 212 | - | ||
| 213 | - DestroyOnlineRecognizerResult(r); | 211 | + Reset(recognizer, stream); |
| 214 | } | 212 | } |
| 213 | + | ||
| 214 | + DestroyOnlineRecognizerResult(r); | ||
| 215 | } | 215 | } |
| 216 | - fclose(fp); | ||
| 217 | 216 | ||
| 218 | // add some tail padding | 217 | // add some tail padding |
| 219 | float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate | 218 | float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate |
| 220 | - AcceptWaveform(stream, 16000, tail_paddings, 4800); | 219 | + AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800); |
| 220 | + | ||
| 221 | + SherpaOnnxFreeWave(wave); | ||
| 221 | 222 | ||
| 222 | InputFinished(stream); | 223 | InputFinished(stream); |
| 223 | while (IsOnlineStreamReady(recognizer, stream)) { | 224 | while (IsOnlineStreamReady(recognizer, stream)) { |
| 1 | + | ||
| 2 | +// We assume you have pre-downloaded the whisper multi-lingual models | ||
| 3 | +// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 4 | +// An example command to download the "tiny" whisper model is given below: | ||
| 5 | +// | ||
| 6 | +// clang-format off | ||
| 7 | +// | ||
| 8 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 9 | +// tar xvf sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 10 | +// rm sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 11 | +// | ||
| 12 | +// clang-format on | ||
| 13 | + | ||
| 14 | +#include <stdio.h> | ||
| 15 | +#include <stdlib.h> | ||
| 16 | +#include <string.h> | ||
| 17 | + | ||
| 18 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 19 | + | ||
| 20 | +int32_t main() { | ||
| 21 | + SherpaOnnxSpokenLanguageIdentificationConfig config; | ||
| 22 | + | ||
| 23 | + memset(&config, 0, sizeof(config)); | ||
| 24 | + | ||
| 25 | + config.whisper.encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"; | ||
| 26 | + config.whisper.decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"; | ||
| 27 | + config.num_threads = 1; | ||
| 28 | + config.debug = 1; | ||
| 29 | + config.provider = "cpu"; | ||
| 30 | + | ||
| 31 | + const SherpaOnnxSpokenLanguageIdentification *slid = | ||
| 32 | + SherpaOnnxCreateSpokenLanguageIdentification(&config); | ||
| 33 | + if (!slid) { | ||
| 34 | + fprintf(stderr, "Failed to create spoken language identifier"); | ||
| 35 | + return -1; | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + // You can find more test waves from | ||
| 39 | + // https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs | ||
| 40 | + const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"; | ||
| 41 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | ||
| 42 | + if (wave == NULL) { | ||
| 43 | + fprintf(stderr, "Failed to read %s\n", wav_filename); | ||
| 44 | + return -1; | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + SherpaOnnxOfflineStream *stream = | ||
| 48 | + SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid); | ||
| 49 | + | ||
| 50 | + AcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | ||
| 51 | + wave->num_samples); | ||
| 52 | + | ||
| 53 | + const SherpaOnnxSpokenLanguageIdentificationResult *result = | ||
| 54 | + SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream); | ||
| 55 | + | ||
| 56 | + fprintf(stderr, "wav_filename: %s\n", wav_filename); | ||
| 57 | + fprintf(stderr, "Detected language: %s\n", result->lang); | ||
| 58 | + | ||
| 59 | + SherpaOnnxDestroySpokenLanguageIdentificationResult(result); | ||
| 60 | + DestroyOfflineStream(stream); | ||
| 61 | + SherpaOnnxFreeWave(wave); | ||
| 62 | + SherpaOnnxDestroySpokenLanguageIdentification(slid); | ||
| 63 | + | ||
| 64 | + return 0; | ||
| 65 | +} |
| @@ -3,7 +3,7 @@ | @@ -3,7 +3,7 @@ | ||
| 3 | set -ex | 3 | set -ex |
| 4 | 4 | ||
| 5 | if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then | 5 | if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then |
| 6 | - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 | 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 |
| 7 | tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 | 7 | tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 |
| 8 | rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 | 8 | rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 |
| 9 | fi | 9 | fi |
| @@ -3,7 +3,7 @@ | @@ -3,7 +3,7 @@ | ||
| 3 | set -ex | 3 | set -ex |
| 4 | 4 | ||
| 5 | if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then | 5 | if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then |
| 6 | - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 | 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 |
| 7 | tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 | 7 | tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 |
| 8 | rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 | 8 | rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 |
| 9 | fi | 9 | fi |
| @@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
| 6 | 6 | ||
| 7 | set -ex | 7 | set -ex |
| 8 | if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then | 8 | if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then |
| 9 | - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | 9 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 |
| 10 | tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | 10 | tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 |
| 11 | fi | 11 | fi |
| 12 | 12 |
| @@ -6,6 +6,7 @@ | @@ -6,6 +6,7 @@ | ||
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include <algorithm> |
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | +#include <string> | ||
| 9 | #include <utility> | 10 | #include <utility> |
| 10 | #include <vector> | 11 | #include <vector> |
| 11 | 12 | ||
| @@ -16,7 +17,9 @@ | @@ -16,7 +17,9 @@ | ||
| 16 | #include "sherpa-onnx/csrc/offline-recognizer.h" | 17 | #include "sherpa-onnx/csrc/offline-recognizer.h" |
| 17 | #include "sherpa-onnx/csrc/offline-tts.h" | 18 | #include "sherpa-onnx/csrc/offline-tts.h" |
| 18 | #include "sherpa-onnx/csrc/online-recognizer.h" | 19 | #include "sherpa-onnx/csrc/online-recognizer.h" |
| 20 | +#include "sherpa-onnx/csrc/spoken-language-identification.h" | ||
| 19 | #include "sherpa-onnx/csrc/voice-activity-detector.h" | 21 | #include "sherpa-onnx/csrc/voice-activity-detector.h" |
| 22 | +#include "sherpa-onnx/csrc/wave-reader.h" | ||
| 20 | #include "sherpa-onnx/csrc/wave-writer.h" | 23 | #include "sherpa-onnx/csrc/wave-writer.h" |
| 21 | 24 | ||
| 22 | struct SherpaOnnxOnlineRecognizer { | 25 | struct SherpaOnnxOnlineRecognizer { |
| @@ -859,3 +862,97 @@ int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, | @@ -859,3 +862,97 @@ int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, | ||
| 859 | int32_t sample_rate, const char *filename) { | 862 | int32_t sample_rate, const char *filename) { |
| 860 | return sherpa_onnx::WriteWave(filename, sample_rate, samples, n); | 863 | return sherpa_onnx::WriteWave(filename, sample_rate, samples, n); |
| 861 | } | 864 | } |
| 865 | + | ||
| 866 | +const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename) { | ||
| 867 | + int32_t sample_rate = -1; | ||
| 868 | + bool is_ok = false; | ||
| 869 | + std::vector<float> samples = | ||
| 870 | + sherpa_onnx::ReadWave(filename, &sample_rate, &is_ok); | ||
| 871 | + if (!is_ok) { | ||
| 872 | + return nullptr; | ||
| 873 | + } | ||
| 874 | + | ||
| 875 | + float *c_samples = new float[samples.size()]; | ||
| 876 | + std::copy(samples.begin(), samples.end(), c_samples); | ||
| 877 | + | ||
| 878 | + SherpaOnnxWave *wave = new SherpaOnnxWave; | ||
| 879 | + wave->samples = c_samples; | ||
| 880 | + wave->sample_rate = sample_rate; | ||
| 881 | + wave->num_samples = samples.size(); | ||
| 882 | + return wave; | ||
| 883 | +} | ||
| 884 | + | ||
| 885 | +void SherpaOnnxFreeWave(const SherpaOnnxWave *wave) { | ||
| 886 | + if (wave) { | ||
| 887 | + delete[] wave->samples; | ||
| 888 | + delete wave; | ||
| 889 | + } | ||
| 890 | +} | ||
| 891 | + | ||
| 892 | +struct SherpaOnnxSpokenLanguageIdentification { | ||
| 893 | + std::unique_ptr<sherpa_onnx::SpokenLanguageIdentification> impl; | ||
| 894 | +}; | ||
| 895 | + | ||
| 896 | +const SherpaOnnxSpokenLanguageIdentification * | ||
| 897 | +SherpaOnnxCreateSpokenLanguageIdentification( | ||
| 898 | + const SherpaOnnxSpokenLanguageIdentificationConfig *config) { | ||
| 899 | + sherpa_onnx::SpokenLanguageIdentificationConfig slid_config; | ||
| 900 | + slid_config.whisper.encoder = SHERPA_ONNX_OR(config->whisper.encoder, ""); | ||
| 901 | + slid_config.whisper.decoder = SHERPA_ONNX_OR(config->whisper.decoder, ""); | ||
| 902 | + slid_config.whisper.tail_paddings = | ||
| 903 | + SHERPA_ONNX_OR(config->whisper.tail_paddings, -1); | ||
| 904 | + slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); | ||
| 905 | + slid_config.debug = config->debug; | ||
| 906 | + slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu"); | ||
| 907 | + | ||
| 908 | + if (slid_config.debug) { | ||
| 909 | + SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str()); | ||
| 910 | + } | ||
| 911 | + | ||
| 912 | + if (!slid_config.Validate()) { | ||
| 913 | + SHERPA_ONNX_LOGE("Errors in config"); | ||
| 914 | + return nullptr; | ||
| 915 | + } | ||
| 916 | + | ||
| 917 | + SherpaOnnxSpokenLanguageIdentification *slid = | ||
| 918 | + new SherpaOnnxSpokenLanguageIdentification; | ||
| 919 | + slid->impl = | ||
| 920 | + std::make_unique<sherpa_onnx::SpokenLanguageIdentification>(slid_config); | ||
| 921 | + | ||
| 922 | + return slid; | ||
| 923 | +} | ||
| 924 | + | ||
| 925 | +void SherpaOnnxDestroySpokenLanguageIdentification( | ||
| 926 | + const SherpaOnnxSpokenLanguageIdentification *slid) { | ||
| 927 | + delete slid; | ||
| 928 | +} | ||
| 929 | + | ||
| 930 | +SherpaOnnxOfflineStream * | ||
| 931 | +SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream( | ||
| 932 | + const SherpaOnnxSpokenLanguageIdentification *slid) { | ||
| 933 | + SherpaOnnxOfflineStream *stream = | ||
| 934 | + new SherpaOnnxOfflineStream(slid->impl->CreateStream()); | ||
| 935 | + return stream; | ||
| 936 | +} | ||
| 937 | + | ||
| 938 | +const SherpaOnnxSpokenLanguageIdentificationResult * | ||
| 939 | +SherpaOnnxSpokenLanguageIdentificationCompute( | ||
| 940 | + const SherpaOnnxSpokenLanguageIdentification *slid, | ||
| 941 | + const SherpaOnnxOfflineStream *s) { | ||
| 942 | + std::string lang = slid->impl->Compute(s->impl.get()); | ||
| 943 | + char *c_lang = new char[lang.size() + 1]; | ||
| 944 | + std::copy(lang.begin(), lang.end(), c_lang); | ||
| 945 | + c_lang[lang.size()] = '\0'; | ||
| 946 | + SherpaOnnxSpokenLanguageIdentificationResult *r = | ||
| 947 | + new SherpaOnnxSpokenLanguageIdentificationResult; | ||
| 948 | + r->lang = c_lang; | ||
| 949 | + return r; | ||
| 950 | +} | ||
| 951 | + | ||
| 952 | +void SherpaOnnxDestroySpokenLanguageIdentificationResult( | ||
| 953 | + const SherpaOnnxSpokenLanguageIdentificationResult *r) { | ||
| 954 | + if (r) { | ||
| 955 | + delete[] r->lang; | ||
| 956 | + delete r; | ||
| 957 | + } | ||
| 958 | +} |
| @@ -820,6 +820,76 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, | @@ -820,6 +820,76 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, | ||
| 820 | int32_t sample_rate, | 820 | int32_t sample_rate, |
| 821 | const char *filename); | 821 | const char *filename); |
| 822 | 822 | ||
| 823 | +SHERPA_ONNX_API typedef struct SherpaOnnxWave { | ||
| 824 | + // samples normalized to the range [-1, 1] | ||
| 825 | + const float *samples; | ||
| 826 | + int32_t sample_rate; | ||
| 827 | + int32_t num_samples; | ||
| 828 | +} SherpaOnnxWave; | ||
| 829 | + | ||
| 830 | +// Return a NULL pointer on error. It supports only standard WAVE file. | ||
| 831 | +// Each sample should be 16-bit. It supports only single channel.. | ||
| 832 | +// | ||
| 833 | +// If the returned pointer is not NULL, the user has to invoke | ||
| 834 | +// SherpaOnnxFreeWave() to free the returned pointer to avoid memory leak. | ||
| 835 | +SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename); | ||
| 836 | + | ||
| 837 | +SHERPA_ONNX_API void SherpaOnnxFreeWave(const SherpaOnnxWave *wave); | ||
| 838 | + | ||
| 839 | +// Spoken language identification | ||
| 840 | + | ||
| 841 | +SHERPA_ONNX_API typedef struct | ||
| 842 | + SherpaOnnxSpokenLanguageIdentificationWhisperConfig { | ||
| 843 | + const char *encoder; | ||
| 844 | + const char *decoder; | ||
| 845 | + int32_t tail_paddings; | ||
| 846 | +} SherpaOnnxSpokenLanguageIdentificationWhisperConfig; | ||
| 847 | + | ||
| 848 | +SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationConfig { | ||
| 849 | + SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper; | ||
| 850 | + int32_t num_threads; | ||
| 851 | + int32_t debug; | ||
| 852 | + const char *provider; | ||
| 853 | +} SherpaOnnxSpokenLanguageIdentificationConfig; | ||
| 854 | + | ||
| 855 | +SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentification | ||
| 856 | + SherpaOnnxSpokenLanguageIdentification; | ||
| 857 | + | ||
| 858 | +// Create an instance of SpokenLanguageIdentification. | ||
| 859 | +// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentification() | ||
| 860 | +// to free the returned pointer to avoid memory leak. | ||
| 861 | +SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentification * | ||
| 862 | +SherpaOnnxCreateSpokenLanguageIdentification( | ||
| 863 | + const SherpaOnnxSpokenLanguageIdentificationConfig *config); | ||
| 864 | + | ||
| 865 | +SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentification( | ||
| 866 | + const SherpaOnnxSpokenLanguageIdentification *slid); | ||
| 867 | + | ||
| 868 | +// The user has to invoke DestroyOfflineStream() | ||
| 869 | +// to free the returned pointer to avoid memory leak | ||
| 870 | +SHERPA_ONNX_API SherpaOnnxOfflineStream * | ||
| 871 | +SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream( | ||
| 872 | + const SherpaOnnxSpokenLanguageIdentification *slid); | ||
| 873 | + | ||
| 874 | +SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationResult { | ||
| 875 | + // en for English | ||
| 876 | + // de for German | ||
| 877 | + // zh for Chinese | ||
| 878 | + // es for Spanish | ||
| 879 | + // ... | ||
| 880 | + const char *lang; | ||
| 881 | +} SherpaOnnxSpokenLanguageIdentificationResult; | ||
| 882 | + | ||
| 883 | +// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentificationResult() | ||
| 884 | +// to free the returned pointer to avoid memory leak | ||
| 885 | +SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentificationResult * | ||
| 886 | +SherpaOnnxSpokenLanguageIdentificationCompute( | ||
| 887 | + const SherpaOnnxSpokenLanguageIdentification *slid, | ||
| 888 | + const SherpaOnnxOfflineStream *s); | ||
| 889 | + | ||
| 890 | +SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentificationResult( | ||
| 891 | + const SherpaOnnxSpokenLanguageIdentificationResult *r); | ||
| 892 | + | ||
| 823 | #if defined(__GNUC__) | 893 | #if defined(__GNUC__) |
| 824 | #pragma GCC diagnostic pop | 894 | #pragma GCC diagnostic pop |
| 825 | #endif | 895 | #endif |
-
请 注册 或 登录 后发表评论