Fangjun Kuang
Committed by GitHub

Add C API for spoken language identification. (#695)

  1 +#!/usr/bin/env bash
  2 +
  3 +set -e
  4 +
  5 +log() {
  6 + # This function is from espnet
  7 + local fname=${BASH_SOURCE[1]##*/}
  8 + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
  9 +}
  10 +
  11 +echo "SLID_EXE is $SLID_EXE"
  12 +echo "PATH: $PATH"
  13 +
  14 +
  15 +log "------------------------------------------------------------"
  16 +log "Download whisper tiny for spoken language identification "
  17 +log "------------------------------------------------------------"
  18 +
  19 +rm -rf sherpa-onnx-whisper-tiny*
  20 +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
  21 +tar xvf sherpa-onnx-whisper-tiny.tar.bz2
  22 +rm sherpa-onnx-whisper-tiny.tar.bz2
  23 +
  24 +$SLID_EXE
  25 +
  26 +rm -rf sherpa-onnx-whisper-tiny*
@@ -28,32 +28,32 @@ ar-arabic.wav @@ -28,32 +28,32 @@ ar-arabic.wav
28 bg-bulgarian.wav 28 bg-bulgarian.wav
29 cs-czech.wav 29 cs-czech.wav
30 da-danish.wav 30 da-danish.wav
31 -de-german.wav  
32 -el-greek.wav  
33 -en-english.wav  
34 -es-spanish.wav  
35 -fa-persian.wav  
36 -fi-finnish.wav  
37 -fr-french.wav  
38 -hi-hindi.wav  
39 -hr-croatian.wav  
40 -id-indonesian.wav  
41 -it-italian.wav  
42 -ja-japanese.wav  
43 -ko-korean.wav  
44 -nl-dutch.wav  
45 -no-norwegian.wav  
46 -po-polish.wav  
47 -pt-portuguese.wav  
48 -ro-romanian.wav  
49 -ru-russian.wav  
50 -sk-slovak.wav  
51 -sv-swedish.wav  
52 -ta-tamil.wav  
53 -tl-tagalog.wav  
54 -tr-turkish.wav  
55 -uk-ukrainian.wav  
56 -zh-chinese.wav 31 +# de-german.wav
  32 +# el-greek.wav
  33 +# en-english.wav
  34 +# es-spanish.wav
  35 +# fa-persian.wav
  36 +# fi-finnish.wav
  37 +# fr-french.wav
  38 +# hi-hindi.wav
  39 +# hr-croatian.wav
  40 +# id-indonesian.wav
  41 +# it-italian.wav
  42 +# ja-japanese.wav
  43 +# ko-korean.wav
  44 +# nl-dutch.wav
  45 +# no-norwegian.wav
  46 +# po-polish.wav
  47 +# pt-portuguese.wav
  48 +# ro-romanian.wav
  49 +# ru-russian.wav
  50 +# sk-slovak.wav
  51 +# sv-swedish.wav
  52 +# ta-tamil.wav
  53 +# tl-tagalog.wav
  54 +# tr-turkish.wav
  55 +# uk-ukrainian.wav
  56 +# zh-chinese.wav
57 ) 57 )
58 58
59 for wav in ${waves[@]}; do 59 for wav in ${waves[@]}; do
@@ -113,6 +113,7 @@ jobs: @@ -113,6 +113,7 @@ jobs:
113 git config --global user.email "csukuangfj@gmail.com" 113 git config --global user.email "csukuangfj@gmail.com"
114 git config --global user.name "Fangjun Kuang" 114 git config --global user.name "Fangjun Kuang"
115 115
  116 + rm -rf huggingface
116 GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface 117 GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
117 118
118 cd huggingface 119 cd huggingface
@@ -90,6 +90,7 @@ jobs: @@ -90,6 +90,7 @@ jobs:
90 git config --global user.email "csukuangfj@gmail.com" 90 git config --global user.email "csukuangfj@gmail.com"
91 git config --global user.name "Fangjun Kuang" 91 git config --global user.name "Fangjun Kuang"
92 92
  93 + rm -rf huggingface
93 GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface 94 GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
94 95
95 cd huggingface 96 cd huggingface
@@ -123,8 +123,15 @@ jobs: @@ -123,8 +123,15 @@ jobs:
123 name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }} 123 name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }}
124 path: build/bin/* 124 path: build/bin/*
125 125
126 - - name: Test spoken language identification  
127 - if: matrix.build_type != 'Debug' 126 + - name: Test spoken language identification (C API)
  127 + shell: bash
  128 + run: |
  129 + export PATH=$PWD/build/bin:$PATH
  130 + export SLID_EXE=spoken-language-identification-c-api
  131 +
  132 + .github/scripts/test-c-api.sh
  133 +
  134 + - name: Test spoken language identification (C++ API)
128 shell: bash 135 shell: bash
129 run: | 136 run: |
130 export PATH=$PWD/build/bin:$PATH 137 export PATH=$PWD/build/bin:$PATH
@@ -243,6 +250,7 @@ jobs: @@ -243,6 +250,7 @@ jobs:
243 git config --global user.email "csukuangfj@gmail.com" 250 git config --global user.email "csukuangfj@gmail.com"
244 git config --global user.name "Fangjun Kuang" 251 git config --global user.name "Fangjun Kuang"
245 252
  253 + rm -rf huggingface
246 GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface 254 GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
247 255
248 cd huggingface 256 cd huggingface
@@ -102,8 +102,15 @@ jobs: @@ -102,8 +102,15 @@ jobs:
102 otool -L build/bin/sherpa-onnx 102 otool -L build/bin/sherpa-onnx
103 otool -l build/bin/sherpa-onnx 103 otool -l build/bin/sherpa-onnx
104 104
105 - - name: Test spoken language identification  
106 - if: matrix.build_type != 'Debug' 105 + - name: Test spoken language identification (C API)
  106 + shell: bash
  107 + run: |
  108 + export PATH=$PWD/build/bin:$PATH
  109 + export SLID_EXE=spoken-language-identification-c-api
  110 +
  111 + .github/scripts/test-c-api.sh
  112 +
  113 + - name: Test spoken language identification (C++ API)
107 shell: bash 114 shell: bash
108 run: | 115 run: |
109 export PATH=$PWD/build/bin:$PATH 116 export PATH=$PWD/build/bin:$PATH
@@ -68,7 +68,15 @@ jobs: @@ -68,7 +68,15 @@ jobs:
68 68
69 ls -lh ./bin/Release/sherpa-onnx.exe 69 ls -lh ./bin/Release/sherpa-onnx.exe
70 70
71 - - name: Test spoken language identification 71 + - name: Test spoken language identification (C API)
  72 + shell: bash
  73 + run: |
  74 + export PATH=$PWD/build/bin/Release:$PATH
  75 + export SLID_EXE=spoken-language-identification-c-api.exe
  76 +
  77 + .github/scripts/test-c-api.sh
  78 +
  79 + - name: Test spoken language identification (C++ API)
72 shell: bash 80 shell: bash
73 run: | 81 run: |
74 export PATH=$PWD/build/bin/Release:$PATH 82 export PATH=$PWD/build/bin/Release:$PATH
@@ -69,6 +69,14 @@ jobs: @@ -69,6 +69,14 @@ jobs:
69 69
70 ls -lh ./bin/Release/sherpa-onnx.exe 70 ls -lh ./bin/Release/sherpa-onnx.exe
71 71
  72 + - name: Test spoken language identification (C API)
  73 + shell: bash
  74 + run: |
  75 + export PATH=$PWD/build/bin/Release:$PATH
  76 + export SLID_EXE=spoken-language-identification-c-api.exe
  77 +
  78 + .github/scripts/test-c-api.sh
  79 +
72 # - name: Test spoken language identification 80 # - name: Test spoken language identification
73 # shell: bash 81 # shell: bash
74 # run: | 82 # run: |
@@ -85,3 +85,4 @@ log @@ -85,3 +85,4 @@ log
85 vits-piper-* 85 vits-piper-*
86 vits-coqui-* 86 vits-coqui-*
87 vits-mms-* 87 vits-mms-*
  88 +*.tar.bz2
@@ -7,8 +7,11 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) @@ -7,8 +7,11 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
7 add_executable(offline-tts-c-api offline-tts-c-api.c) 7 add_executable(offline-tts-c-api offline-tts-c-api.c)
8 target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) 8 target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)
9 9
  10 +add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c)
  11 +target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)
  12 +
10 if(SHERPA_ONNX_HAS_ALSA) 13 if(SHERPA_ONNX_HAS_ALSA)
11 add_subdirectory(./asr-microphone-example) 14 add_subdirectory(./asr-microphone-example)
12 -else() 15 +elseif((UNIX AND NOT APPLE) OR LINUX)
13 message(WARNING "Not include ./asr-microphone-example since alsa is not available") 16 message(WARNING "Not include ./asr-microphone-example since alsa is not available")
14 endif() 17 endif()
@@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd) @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd)
4 CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/ 4 CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
5 LDFLAGS := -L ../build/lib 5 LDFLAGS := -L ../build/lib
6 LDFLAGS += -L ../build/_deps/onnxruntime-src/lib 6 LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
7 -LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs 7 +LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
8 LDFLAGS += -framework Foundation 8 LDFLAGS += -framework Foundation
9 LDFLAGS += -lc++ 9 LDFLAGS += -lc++
10 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib 10 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
@@ -169,55 +169,56 @@ int32_t main(int32_t argc, char *argv[]) { @@ -169,55 +169,56 @@ int32_t main(int32_t argc, char *argv[]) {
169 int32_t segment_id = 0; 169 int32_t segment_id = 0;
170 170
171 const char *wav_filename = argv[context.index]; 171 const char *wav_filename = argv[context.index];
172 - FILE *fp = fopen(wav_filename, "rb");  
173 - if (!fp) {  
174 - fprintf(stderr, "Failed to open %s\n", wav_filename); 172 + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  173 + if (wave == NULL) {
  174 + fprintf(stderr, "Failed to read %s\n", wav_filename);
175 return -1; 175 return -1;
176 } 176 }
177 -  
178 - // Assume the wave header occupies 44 bytes.  
179 - fseek(fp, 44, SEEK_SET);  
180 -  
181 // simulate streaming 177 // simulate streaming
182 178
183 #define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz 179 #define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz
184 180
185 int16_t buffer[N]; 181 int16_t buffer[N];
186 float samples[N]; 182 float samples[N];
  183 + fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
  184 + wave->sample_rate, wave->num_samples,
  185 + (float)wave->num_samples / wave->sample_rate);
  186 +
  187 + int32_t k = 0;
  188 + while (k < wave->num_samples) {
  189 + int32_t start = k;
  190 + int32_t end =
  191 + (start + N > wave->num_samples) ? wave->num_samples : (start + N);
  192 + k += N;
  193 +
  194 + AcceptWaveform(stream, wave->sample_rate, wave->samples + start,
  195 + end - start);
  196 + while (IsOnlineStreamReady(recognizer, stream)) {
  197 + DecodeOnlineStream(recognizer, stream);
  198 + }
187 199
188 - while (!feof(fp)) {  
189 - size_t n = fread((void *)buffer, sizeof(int16_t), N, fp);  
190 - if (n > 0) {  
191 - for (size_t i = 0; i != n; ++i) {  
192 - samples[i] = buffer[i] / 32768.;  
193 - }  
194 - AcceptWaveform(stream, 16000, samples, n);  
195 - while (IsOnlineStreamReady(recognizer, stream)) {  
196 - DecodeOnlineStream(recognizer, stream);  
197 - } 200 + const SherpaOnnxOnlineRecognizerResult *r =
  201 + GetOnlineStreamResult(recognizer, stream);
198 202
199 - const SherpaOnnxOnlineRecognizerResult *r =  
200 - GetOnlineStreamResult(recognizer, stream); 203 + if (strlen(r->text)) {
  204 + SherpaOnnxPrint(display, segment_id, r->text);
  205 + }
201 206
  207 + if (IsEndpoint(recognizer, stream)) {
202 if (strlen(r->text)) { 208 if (strlen(r->text)) {
203 - SherpaOnnxPrint(display, segment_id, r->text); 209 + ++segment_id;
204 } 210 }
205 -  
206 - if (IsEndpoint(recognizer, stream)) {  
207 - if (strlen(r->text)) {  
208 - ++segment_id;  
209 - }  
210 - Reset(recognizer, stream);  
211 - }  
212 -  
213 - DestroyOnlineRecognizerResult(r); 211 + Reset(recognizer, stream);
214 } 212 }
  213 +
  214 + DestroyOnlineRecognizerResult(r);
215 } 215 }
216 - fclose(fp);  
217 216
218 // add some tail padding 217 // add some tail padding
219 float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate 218 float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate
220 - AcceptWaveform(stream, 16000, tail_paddings, 4800); 219 + AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800);
  220 +
  221 + SherpaOnnxFreeWave(wave);
221 222
222 InputFinished(stream); 223 InputFinished(stream);
223 while (IsOnlineStreamReady(recognizer, stream)) { 224 while (IsOnlineStreamReady(recognizer, stream)) {
  1 +
  2 +// We assume you have pre-downloaded the whisper multi-lingual models
  3 +// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  4 +// An example command to download the "tiny" whisper model is given below:
  5 +//
  6 +// clang-format off
  7 +//
  8 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
  9 +// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
  10 +// rm sherpa-onnx-whisper-tiny.tar.bz2
  11 +//
  12 +// clang-format on
  13 +
  14 +#include <stdio.h>
  15 +#include <stdlib.h>
  16 +#include <string.h>
  17 +
  18 +#include "sherpa-onnx/c-api/c-api.h"
  19 +
  20 +int32_t main() {
  21 + SherpaOnnxSpokenLanguageIdentificationConfig config;
  22 +
  23 + memset(&config, 0, sizeof(config));
  24 +
  25 + config.whisper.encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
  26 + config.whisper.decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
  27 + config.num_threads = 1;
  28 + config.debug = 1;
  29 + config.provider = "cpu";
  30 +
  31 + const SherpaOnnxSpokenLanguageIdentification *slid =
  32 + SherpaOnnxCreateSpokenLanguageIdentification(&config);
  33 + if (!slid) {
  34 + fprintf(stderr, "Failed to create spoken language identifier");
  35 + return -1;
  36 + }
  37 +
  38 + // You can find more test waves from
  39 + // https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs
  40 + const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
  41 + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  42 + if (wave == NULL) {
  43 + fprintf(stderr, "Failed to read %s\n", wav_filename);
  44 + return -1;
  45 + }
  46 +
  47 + SherpaOnnxOfflineStream *stream =
  48 + SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);
  49 +
  50 + AcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
  51 + wave->num_samples);
  52 +
  53 + const SherpaOnnxSpokenLanguageIdentificationResult *result =
  54 + SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream);
  55 +
  56 + fprintf(stderr, "wav_filename: %s\n", wav_filename);
  57 + fprintf(stderr, "Detected language: %s\n", result->lang);
  58 +
  59 + SherpaOnnxDestroySpokenLanguageIdentificationResult(result);
  60 + DestroyOfflineStream(stream);
  61 + SherpaOnnxFreeWave(wave);
  62 + SherpaOnnxDestroySpokenLanguageIdentification(slid);
  63 +
  64 + return 0;
  65 +}
@@ -3,7 +3,7 @@ @@ -3,7 +3,7 @@
3 set -ex 3 set -ex
4 4
5 if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then 5 if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
6 - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
7 tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 7 tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
8 rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 8 rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
9 fi 9 fi
@@ -3,7 +3,7 @@ @@ -3,7 +3,7 @@
3 set -ex 3 set -ex
4 4
5 if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then 5 if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
6 - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
7 tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 7 tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
8 rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 8 rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
9 fi 9 fi
@@ -6,7 +6,7 @@ @@ -6,7 +6,7 @@
6 6
7 set -ex 7 set -ex
8 if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then 8 if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
9 - wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 9 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
10 tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 10 tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
11 fi 11 fi
12 12
@@ -6,6 +6,7 @@ @@ -6,6 +6,7 @@
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <memory> 8 #include <memory>
  9 +#include <string>
9 #include <utility> 10 #include <utility>
10 #include <vector> 11 #include <vector>
11 12
@@ -16,7 +17,9 @@ @@ -16,7 +17,9 @@
16 #include "sherpa-onnx/csrc/offline-recognizer.h" 17 #include "sherpa-onnx/csrc/offline-recognizer.h"
17 #include "sherpa-onnx/csrc/offline-tts.h" 18 #include "sherpa-onnx/csrc/offline-tts.h"
18 #include "sherpa-onnx/csrc/online-recognizer.h" 19 #include "sherpa-onnx/csrc/online-recognizer.h"
  20 +#include "sherpa-onnx/csrc/spoken-language-identification.h"
19 #include "sherpa-onnx/csrc/voice-activity-detector.h" 21 #include "sherpa-onnx/csrc/voice-activity-detector.h"
  22 +#include "sherpa-onnx/csrc/wave-reader.h"
20 #include "sherpa-onnx/csrc/wave-writer.h" 23 #include "sherpa-onnx/csrc/wave-writer.h"
21 24
22 struct SherpaOnnxOnlineRecognizer { 25 struct SherpaOnnxOnlineRecognizer {
@@ -859,3 +862,97 @@ int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, @@ -859,3 +862,97 @@ int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
859 int32_t sample_rate, const char *filename) { 862 int32_t sample_rate, const char *filename) {
860 return sherpa_onnx::WriteWave(filename, sample_rate, samples, n); 863 return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
861 } 864 }
  865 +
  866 +const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename) {
  867 + int32_t sample_rate = -1;
  868 + bool is_ok = false;
  869 + std::vector<float> samples =
  870 + sherpa_onnx::ReadWave(filename, &sample_rate, &is_ok);
  871 + if (!is_ok) {
  872 + return nullptr;
  873 + }
  874 +
  875 + float *c_samples = new float[samples.size()];
  876 + std::copy(samples.begin(), samples.end(), c_samples);
  877 +
  878 + SherpaOnnxWave *wave = new SherpaOnnxWave;
  879 + wave->samples = c_samples;
  880 + wave->sample_rate = sample_rate;
  881 + wave->num_samples = samples.size();
  882 + return wave;
  883 +}
  884 +
  885 +void SherpaOnnxFreeWave(const SherpaOnnxWave *wave) {
  886 + if (wave) {
  887 + delete[] wave->samples;
  888 + delete wave;
  889 + }
  890 +}
  891 +
  892 +struct SherpaOnnxSpokenLanguageIdentification {
  893 + std::unique_ptr<sherpa_onnx::SpokenLanguageIdentification> impl;
  894 +};
  895 +
  896 +const SherpaOnnxSpokenLanguageIdentification *
  897 +SherpaOnnxCreateSpokenLanguageIdentification(
  898 + const SherpaOnnxSpokenLanguageIdentificationConfig *config) {
  899 + sherpa_onnx::SpokenLanguageIdentificationConfig slid_config;
  900 + slid_config.whisper.encoder = SHERPA_ONNX_OR(config->whisper.encoder, "");
  901 + slid_config.whisper.decoder = SHERPA_ONNX_OR(config->whisper.decoder, "");
  902 + slid_config.whisper.tail_paddings =
  903 + SHERPA_ONNX_OR(config->whisper.tail_paddings, -1);
  904 + slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
  905 + slid_config.debug = config->debug;
  906 + slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
  907 +
  908 + if (slid_config.debug) {
  909 + SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str());
  910 + }
  911 +
  912 + if (!slid_config.Validate()) {
  913 + SHERPA_ONNX_LOGE("Errors in config");
  914 + return nullptr;
  915 + }
  916 +
  917 + SherpaOnnxSpokenLanguageIdentification *slid =
  918 + new SherpaOnnxSpokenLanguageIdentification;
  919 + slid->impl =
  920 + std::make_unique<sherpa_onnx::SpokenLanguageIdentification>(slid_config);
  921 +
  922 + return slid;
  923 +}
  924 +
  925 +void SherpaOnnxDestroySpokenLanguageIdentification(
  926 + const SherpaOnnxSpokenLanguageIdentification *slid) {
  927 + delete slid;
  928 +}
  929 +
  930 +SherpaOnnxOfflineStream *
  931 +SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
  932 + const SherpaOnnxSpokenLanguageIdentification *slid) {
  933 + SherpaOnnxOfflineStream *stream =
  934 + new SherpaOnnxOfflineStream(slid->impl->CreateStream());
  935 + return stream;
  936 +}
  937 +
  938 +const SherpaOnnxSpokenLanguageIdentificationResult *
  939 +SherpaOnnxSpokenLanguageIdentificationCompute(
  940 + const SherpaOnnxSpokenLanguageIdentification *slid,
  941 + const SherpaOnnxOfflineStream *s) {
  942 + std::string lang = slid->impl->Compute(s->impl.get());
  943 + char *c_lang = new char[lang.size() + 1];
  944 + std::copy(lang.begin(), lang.end(), c_lang);
  945 + c_lang[lang.size()] = '\0';
  946 + SherpaOnnxSpokenLanguageIdentificationResult *r =
  947 + new SherpaOnnxSpokenLanguageIdentificationResult;
  948 + r->lang = c_lang;
  949 + return r;
  950 +}
  951 +
  952 +void SherpaOnnxDestroySpokenLanguageIdentificationResult(
  953 + const SherpaOnnxSpokenLanguageIdentificationResult *r) {
  954 + if (r) {
  955 + delete[] r->lang;
  956 + delete r;
  957 + }
  958 +}
@@ -820,6 +820,76 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, @@ -820,6 +820,76 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
820 int32_t sample_rate, 820 int32_t sample_rate,
821 const char *filename); 821 const char *filename);
822 822
  823 +SHERPA_ONNX_API typedef struct SherpaOnnxWave {
  824 + // samples normalized to the range [-1, 1]
  825 + const float *samples;
  826 + int32_t sample_rate;
  827 + int32_t num_samples;
  828 +} SherpaOnnxWave;
  829 +
  830 +// Return a NULL pointer on error. It supports only standard WAVE file.
  831 +// Each sample should be 16-bit. It supports only single channel..
  832 +//
  833 +// If the returned pointer is not NULL, the user has to invoke
  834 +// SherpaOnnxFreeWave() to free the returned pointer to avoid memory leak.
  835 +SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename);
  836 +
  837 +SHERPA_ONNX_API void SherpaOnnxFreeWave(const SherpaOnnxWave *wave);
  838 +
  839 +// Spoken language identification
  840 +
  841 +SHERPA_ONNX_API typedef struct
  842 + SherpaOnnxSpokenLanguageIdentificationWhisperConfig {
  843 + const char *encoder;
  844 + const char *decoder;
  845 + int32_t tail_paddings;
  846 +} SherpaOnnxSpokenLanguageIdentificationWhisperConfig;
  847 +
  848 +SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationConfig {
  849 + SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper;
  850 + int32_t num_threads;
  851 + int32_t debug;
  852 + const char *provider;
  853 +} SherpaOnnxSpokenLanguageIdentificationConfig;
  854 +
  855 +SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentification
  856 + SherpaOnnxSpokenLanguageIdentification;
  857 +
  858 +// Create an instance of SpokenLanguageIdentification.
  859 +// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentification()
  860 +// to free the returned pointer to avoid memory leak.
  861 +SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentification *
  862 +SherpaOnnxCreateSpokenLanguageIdentification(
  863 + const SherpaOnnxSpokenLanguageIdentificationConfig *config);
  864 +
  865 +SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentification(
  866 + const SherpaOnnxSpokenLanguageIdentification *slid);
  867 +
  868 +// The user has to invoke DestroyOfflineStream()
  869 +// to free the returned pointer to avoid memory leak
  870 +SHERPA_ONNX_API SherpaOnnxOfflineStream *
  871 +SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
  872 + const SherpaOnnxSpokenLanguageIdentification *slid);
  873 +
  874 +SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationResult {
  875 + // en for English
  876 + // de for German
  877 + // zh for Chinese
  878 + // es for Spanish
  879 + // ...
  880 + const char *lang;
  881 +} SherpaOnnxSpokenLanguageIdentificationResult;
  882 +
  883 +// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentificationResult()
  884 +// to free the returned pointer to avoid memory leak
  885 +SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentificationResult *
  886 +SherpaOnnxSpokenLanguageIdentificationCompute(
  887 + const SherpaOnnxSpokenLanguageIdentification *slid,
  888 + const SherpaOnnxOfflineStream *s);
  889 +
  890 +SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentificationResult(
  891 + const SherpaOnnxSpokenLanguageIdentificationResult *r);
  892 +
823 #if defined(__GNUC__) 893 #if defined(__GNUC__)
824 #pragma GCC diagnostic pop 894 #pragma GCC diagnostic pop
825 #endif 895 #endif