Fangjun Kuang
Committed by GitHub

Add C++ support for non-streaming NeMo fast conformer hybrid transducer ctc (the ctc branch) (#848)

@@ -13,14 +13,111 @@ echo "PATH: $PATH" @@ -13,14 +13,111 @@ echo "PATH: $PATH"
13 13
14 which $EXE 14 which $EXE
15 15
  16 +log "-----------------------------------------------------------------"
  17 +log "Run Nemo fast conformer hybrid transducer ctc models (CTC branch)"
  18 +log "-----------------------------------------------------------------"
  19 +
  20 +url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  21 +name=$(basename $url)
  22 +curl -SL -O $url
  23 +tar xvf $name
  24 +rm $name
  25 +repo=$(basename -s .tar.bz2 $name)
  26 +ls -lh $repo
  27 +
  28 +log "test $repo"
  29 +test_wavs=(
  30 +de-german.wav
  31 +es-spanish.wav
  32 +hr-croatian.wav
  33 +po-polish.wav
  34 +uk-ukrainian.wav
  35 +en-english.wav
  36 +fr-french.wav
  37 +it-italian.wav
  38 +ru-russian.wav
  39 +)
  40 +for w in ${test_wavs[@]}; do
  41 + time $EXE \
  42 + --tokens=$repo/tokens.txt \
  43 + --nemo-ctc-model=$repo/model.onnx \
  44 + --debug=1 \
  45 + $repo/test_wavs/$w
  46 +done
  47 +
  48 +rm -rf $repo
  49 +
  50 +url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-24500.tar.bz2
  51 +name=$(basename $url)
  52 +curl -SL -O $url
  53 +tar xvf $name
  54 +rm $name
  55 +repo=$(basename -s .tar.bz2 $name)
  56 +ls -lh $repo
  57 +
  58 +log "Test $repo"
  59 +
  60 +time $EXE \
  61 + --tokens=$repo/tokens.txt \
  62 + --nemo-ctc-model=$repo/model.onnx \
  63 + --debug=1 \
  64 + $repo/test_wavs/en-english.wav
  65 +
  66 +rm -rf $repo
  67 +
  68 +url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-es-1424.tar.bz2
  69 +name=$(basename $url)
  70 +curl -SL -O $url
  71 +tar xvf $name
  72 +rm $name
  73 +repo=$(basename -s .tar.bz2 $name)
  74 +ls -lh $repo
  75 +
  76 +log "test $repo"
  77 +
  78 +time $EXE \
  79 + --tokens=$repo/tokens.txt \
  80 + --nemo-ctc-model=$repo/model.onnx \
  81 + --debug=1 \
  82 + $repo/test_wavs/es-spanish.wav
  83 +
  84 +rm -rf $repo
  85 +
  86 +url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288.tar.bz2
  87 +name=$(basename $url)
  88 +curl -SL -O $url
  89 +tar xvf $name
  90 +rm $name
  91 +repo=$(basename -s .tar.bz2 $name)
  92 +ls -lh $repo
  93 +
  94 +log "Test $repo"
  95 +
  96 +test_wavs=(
  97 +en-english.wav
  98 +de-german.wav
  99 +fr-french.wav
  100 +es-spanish.wav
  101 +)
  102 +
  103 +for w in ${test_wavs[@]}; do
  104 + time $EXE \
  105 + --tokens=$repo/tokens.txt \
  106 + --nemo-ctc-model=$repo/model.onnx \
  107 + --debug=1 \
  108 + $repo/test_wavs/$w
  109 +done
  110 +
  111 +rm -rf $repo
  112 +
16 log "------------------------------------------------------------" 113 log "------------------------------------------------------------"
17 log "Run Wenet models" 114 log "Run Wenet models"
18 log "------------------------------------------------------------" 115 log "------------------------------------------------------------"
19 wenet_models=( 116 wenet_models=(
20 sherpa-onnx-zh-wenet-aishell 117 sherpa-onnx-zh-wenet-aishell
21 -sherpa-onnx-zh-wenet-aishell2 118 +# sherpa-onnx-zh-wenet-aishell2
22 # sherpa-onnx-zh-wenet-wenetspeech 119 # sherpa-onnx-zh-wenet-wenetspeech
23 -sherpa-onnx-zh-wenet-multi-cn 120 +# sherpa-onnx-zh-wenet-multi-cn
24 sherpa-onnx-en-wenet-librispeech 121 sherpa-onnx-en-wenet-librispeech
25 # sherpa-onnx-en-wenet-gigaspeech 122 # sherpa-onnx-en-wenet-gigaspeech
26 ) 123 )
@@ -62,6 +62,11 @@ for wav in ${waves[@]}; do @@ -62,6 +62,11 @@ for wav in ${waves[@]}; do
62 ls -lh *.wav 62 ls -lh *.wav
63 done 63 done
64 64
  65 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
  66 +tar xvf spoken-language-identification-test-wavs.tar.bz2
  67 +rm spoken-language-identification-test-wavs.tar.bz2
  68 +data=spoken-language-identification-test-wavs
  69 +
65 for name in ${names[@]}; do 70 for name in ${names[@]}; do
66 log "------------------------------------------------------------" 71 log "------------------------------------------------------------"
67 log "Run $name" 72 log "Run $name"
@@ -85,14 +90,14 @@ for name in ${names[@]}; do @@ -85,14 +90,14 @@ for name in ${names[@]}; do
85 time $EXE \ 90 time $EXE \
86 --whisper-encoder=$repo/${name}-encoder.onnx \ 91 --whisper-encoder=$repo/${name}-encoder.onnx \
87 --whisper-decoder=$repo/${name}-decoder.onnx \ 92 --whisper-decoder=$repo/${name}-decoder.onnx \
88 - $wav 93 + $data/$wav
89 94
90 log "test int8 onnx" 95 log "test int8 onnx"
91 96
92 time $EXE \ 97 time $EXE \
93 --whisper-encoder=$repo/${name}-encoder.int8.onnx \ 98 --whisper-encoder=$repo/${name}-encoder.int8.onnx \
94 --whisper-decoder=$repo/${name}-decoder.int8.onnx \ 99 --whisper-decoder=$repo/${name}-decoder.int8.onnx \
95 - $wav 100 + $data/$wav
96 done 101 done
97 rm -rf $repo 102 rm -rf $repo
98 done 103 done
@@ -128,13 +128,13 @@ jobs: @@ -128,13 +128,13 @@ jobs:
128 name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} 128 name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
129 path: install/* 129 path: install/*
130 130
131 - - name: Test offline punctuation 131 + - name: Test spoken language identification (C++ API)
132 shell: bash 132 shell: bash
133 run: | 133 run: |
134 export PATH=$PWD/build/bin:$PATH 134 export PATH=$PWD/build/bin:$PATH
135 - export EXE=sherpa-onnx-offline-punctuation 135 + export EXE=sherpa-onnx-offline-language-identification
136 136
137 - .github/scripts/test-offline-punctuation.sh 137 + .github/scripts/test-spoken-language-identification.sh
138 138
139 - name: Test C API 139 - name: Test C API
140 shell: bash 140 shell: bash
@@ -147,13 +147,13 @@ jobs: @@ -147,13 +147,13 @@ jobs:
147 147
148 .github/scripts/test-c-api.sh 148 .github/scripts/test-c-api.sh
149 149
150 - - name: Test Audio tagging 150 + - name: Test offline CTC
151 shell: bash 151 shell: bash
152 run: | 152 run: |
153 export PATH=$PWD/build/bin:$PATH 153 export PATH=$PWD/build/bin:$PATH
154 - export EXE=sherpa-onnx-offline-audio-tagging 154 + export EXE=sherpa-onnx-offline
155 155
156 - .github/scripts/test-audio-tagging.sh 156 + .github/scripts/test-offline-ctc.sh
157 157
158 - name: Test online CTC 158 - name: Test online CTC
159 shell: bash 159 shell: bash
@@ -163,14 +163,21 @@ jobs: @@ -163,14 +163,21 @@ jobs:
163 163
164 .github/scripts/test-online-ctc.sh 164 .github/scripts/test-online-ctc.sh
165 165
  166 + - name: Test offline punctuation
  167 + shell: bash
  168 + run: |
  169 + export PATH=$PWD/build/bin:$PATH
  170 + export EXE=sherpa-onnx-offline-punctuation
  171 +
  172 + .github/scripts/test-offline-punctuation.sh
166 173
167 - - name: Test spoken language identification (C++ API) 174 + - name: Test Audio tagging
168 shell: bash 175 shell: bash
169 run: | 176 run: |
170 export PATH=$PWD/build/bin:$PATH 177 export PATH=$PWD/build/bin:$PATH
171 - export EXE=sherpa-onnx-offline-language-identification 178 + export EXE=sherpa-onnx-offline-audio-tagging
172 179
173 - .github/scripts/test-spoken-language-identification.sh 180 + .github/scripts/test-audio-tagging.sh
174 181
175 - name: Test transducer kws 182 - name: Test transducer kws
176 shell: bash 183 shell: bash
@@ -180,7 +187,6 @@ jobs: @@ -180,7 +187,6 @@ jobs:
180 187
181 .github/scripts/test-kws.sh 188 .github/scripts/test-kws.sh
182 189
183 -  
184 - name: Test offline Whisper 190 - name: Test offline Whisper
185 if: matrix.build_type != 'Debug' 191 if: matrix.build_type != 'Debug'
186 shell: bash 192 shell: bash
@@ -192,14 +198,6 @@ jobs: @@ -192,14 +198,6 @@ jobs:
192 198
193 .github/scripts/test-offline-whisper.sh 199 .github/scripts/test-offline-whisper.sh
194 200
195 - - name: Test offline CTC  
196 - shell: bash  
197 - run: |  
198 - export PATH=$PWD/build/bin:$PATH  
199 - export EXE=sherpa-onnx-offline  
200 -  
201 - .github/scripts/test-offline-ctc.sh  
202 -  
203 - name: Test offline TTS 201 - name: Test offline TTS
204 if: matrix.with_tts == 'ON' 202 if: matrix.with_tts == 'ON'
205 shell: bash 203 shell: bash
@@ -107,6 +107,14 @@ jobs: @@ -107,6 +107,14 @@ jobs:
107 otool -L build/bin/sherpa-onnx 107 otool -L build/bin/sherpa-onnx
108 otool -l build/bin/sherpa-onnx 108 otool -l build/bin/sherpa-onnx
109 109
  110 + - name: Test online CTC
  111 + shell: bash
  112 + run: |
  113 + export PATH=$PWD/build/bin:$PATH
  114 + export EXE=sherpa-onnx
  115 +
  116 + .github/scripts/test-online-ctc.sh
  117 +
110 - name: Test offline punctuation 118 - name: Test offline punctuation
111 shell: bash 119 shell: bash
112 run: | 120 run: |
@@ -150,14 +158,6 @@ jobs: @@ -150,14 +158,6 @@ jobs:
150 158
151 .github/scripts/test-kws.sh 159 .github/scripts/test-kws.sh
152 160
153 - - name: Test online CTC  
154 - shell: bash  
155 - run: |  
156 - export PATH=$PWD/build/bin:$PATH  
157 - export EXE=sherpa-onnx  
158 -  
159 - .github/scripts/test-online-ctc.sh  
160 -  
161 - name: Test offline TTS 161 - name: Test offline TTS
162 if: matrix.with_tts == 'ON' 162 if: matrix.with_tts == 'ON'
163 shell: bash 163 shell: bash
@@ -20,6 +20,7 @@ namespace { @@ -20,6 +20,7 @@ namespace {
20 20
21 enum class ModelType { 21 enum class ModelType {
22 kEncDecCTCModelBPE, 22 kEncDecCTCModelBPE,
  23 + kEncDecHybridRNNTCTCBPEModel,
23 kTdnn, 24 kTdnn,
24 kZipformerCtc, 25 kZipformerCtc,
25 kWenetCtc, 26 kWenetCtc,
@@ -55,7 +56,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length, @@ -55,7 +56,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
55 "No model_type in the metadata!\n" 56 "No model_type in the metadata!\n"
56 "If you are using models from NeMo, please refer to\n" 57 "If you are using models from NeMo, please refer to\n"
57 "https://huggingface.co/csukuangfj/" 58 "https://huggingface.co/csukuangfj/"
58 - "sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py" 59 + "sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py\n"
  60 + "or "
  61 + "https://github.com/k2-fsa/sherpa-onnx/tree/master/scripts/nemo/"
  62 + "fast-conformer-hybrid-transducer-ctc\n"
59 "If you are using models from WeNet, please refer to\n" 63 "If you are using models from WeNet, please refer to\n"
60 "https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/" 64 "https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/"
61 "run.sh\n" 65 "run.sh\n"
@@ -66,6 +70,8 @@ static ModelType GetModelType(char *model_data, size_t model_data_length, @@ -66,6 +70,8 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
66 70
67 if (model_type.get() == std::string("EncDecCTCModelBPE")) { 71 if (model_type.get() == std::string("EncDecCTCModelBPE")) {
68 return ModelType::kEncDecCTCModelBPE; 72 return ModelType::kEncDecCTCModelBPE;
  73 + } else if (model_type.get() == std::string("EncDecHybridRNNTCTCBPEModel")) {
  74 + return ModelType::kEncDecHybridRNNTCTCBPEModel;
69 } else if (model_type.get() == std::string("tdnn")) { 75 } else if (model_type.get() == std::string("tdnn")) {
70 return ModelType::kTdnn; 76 return ModelType::kTdnn;
71 } else if (model_type.get() == std::string("zipformer2_ctc")) { 77 } else if (model_type.get() == std::string("zipformer2_ctc")) {
@@ -106,6 +112,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create( @@ -106,6 +112,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
106 case ModelType::kEncDecCTCModelBPE: 112 case ModelType::kEncDecCTCModelBPE:
107 return std::make_unique<OfflineNemoEncDecCtcModel>(config); 113 return std::make_unique<OfflineNemoEncDecCtcModel>(config);
108 break; 114 break;
  115 + case ModelType::kEncDecHybridRNNTCTCBPEModel:
  116 + return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(config);
  117 + break;
109 case ModelType::kTdnn: 118 case ModelType::kTdnn:
110 return std::make_unique<OfflineTdnnCtcModel>(config); 119 return std::make_unique<OfflineTdnnCtcModel>(config);
111 break; 120 break;
@@ -153,6 +162,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create( @@ -153,6 +162,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
153 case ModelType::kEncDecCTCModelBPE: 162 case ModelType::kEncDecCTCModelBPE:
154 return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config); 163 return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config);
155 break; 164 break;
  165 + case ModelType::kEncDecHybridRNNTCTCBPEModel:
  166 + return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(config);
  167 + break;
156 case ModelType::kTdnn: 168 case ModelType::kTdnn:
157 return std::make_unique<OfflineTdnnCtcModel>(mgr, config); 169 return std::make_unique<OfflineTdnnCtcModel>(mgr, config);
158 break; 170 break;
@@ -81,6 +81,8 @@ class OfflineNemoEncDecCtcModel : public OfflineCtcModel { @@ -81,6 +81,8 @@ class OfflineNemoEncDecCtcModel : public OfflineCtcModel {
81 std::unique_ptr<Impl> impl_; 81 std::unique_ptr<Impl> impl_;
82 }; 82 };
83 83
  84 +using OfflineNemoEncDecHybridRNNTCTCBPEModel = OfflineNemoEncDecCtcModel;
  85 +
84 } // namespace sherpa_onnx 86 } // namespace sherpa_onnx
85 87
86 #endif // SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_ 88 #endif // SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_
@@ -122,7 +122,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create( @@ -122,7 +122,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
122 return std::make_unique<OfflineRecognizerParaformerImpl>(config); 122 return std::make_unique<OfflineRecognizerParaformerImpl>(config);
123 } 123 }
124 124
125 - if (model_type == "EncDecCTCModelBPE" || model_type == "tdnn" || 125 + if (model_type == "EncDecCTCModelBPE" ||
  126 + model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
126 model_type == "zipformer2_ctc" || model_type == "wenet_ctc") { 127 model_type == "zipformer2_ctc" || model_type == "wenet_ctc") {
127 return std::make_unique<OfflineRecognizerCtcImpl>(config); 128 return std::make_unique<OfflineRecognizerCtcImpl>(config);
128 } 129 }
@@ -137,6 +138,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create( @@ -137,6 +138,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
137 " - Non-streaming transducer models from icefall\n" 138 " - Non-streaming transducer models from icefall\n"
138 " - Non-streaming Paraformer models from FunASR\n" 139 " - Non-streaming Paraformer models from FunASR\n"
139 " - EncDecCTCModelBPE models from NeMo\n" 140 " - EncDecCTCModelBPE models from NeMo\n"
  141 + " - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
140 " - Whisper models\n" 142 " - Whisper models\n"
141 " - Tdnn models\n" 143 " - Tdnn models\n"
142 " - Zipformer CTC models\n" 144 " - Zipformer CTC models\n"
@@ -252,7 +254,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create( @@ -252,7 +254,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
252 return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config); 254 return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
253 } 255 }
254 256
255 - if (model_type == "EncDecCTCModelBPE" || model_type == "tdnn" || 257 + if (model_type == "EncDecCTCModelBPE" ||
  258 + model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
256 model_type == "zipformer2_ctc" || model_type == "wenet_ctc") { 259 model_type == "zipformer2_ctc" || model_type == "wenet_ctc") {
257 return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config); 260 return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
258 } 261 }
@@ -267,6 +270,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create( @@ -267,6 +270,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
267 " - Non-streaming transducer models from icefall\n" 270 " - Non-streaming transducer models from icefall\n"
268 " - Non-streaming Paraformer models from FunASR\n" 271 " - Non-streaming Paraformer models from FunASR\n"
269 " - EncDecCTCModelBPE models from NeMo\n" 272 " - EncDecCTCModelBPE models from NeMo\n"
  273 + " - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
270 " - Whisper models\n" 274 " - Whisper models\n"
271 " - Tdnn models\n" 275 " - Tdnn models\n"
272 " - Zipformer CTC models\n" 276 " - Zipformer CTC models\n"
@@ -67,9 +67,13 @@ void SymbolTable::Init(std::istream &is) { @@ -67,9 +67,13 @@ void SymbolTable::Init(std::istream &is) {
67 // the following check. 67 // the following check.
68 // 68 //
69 // Note: Only id2sym_ matters as we use it to convert ID to symbols. 69 // Note: Only id2sym_ matters as we use it to convert ID to symbols.
  70 +#if 0
  71 + // we disable the test here since for some multi-lingual BPE models
  72 + // from NeMo, the same symbol can appear multiple times with different IDs.
70 if (sym != " ") { 73 if (sym != " ") {
71 assert(sym2id_.count(sym) == 0); 74 assert(sym2id_.count(sym) == 0);
72 } 75 }
  76 +#endif
73 77
74 assert(id2sym_.count(id) == 0); 78 assert(id2sym_.count(id) == 0);
75 79