Fangjun Kuang
Committed by GitHub

Add TeleSpeech CTC to non_streaming_server.py (#1649)

@@ -116,6 +116,16 @@ python3 ./python-api-examples/non_streaming_server.py \ @@ -116,6 +116,16 @@ python3 ./python-api-examples/non_streaming_server.py \
116 --sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \ 116 --sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
117 --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt 117 --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt
118 118
  119 +(9) Use a Non-streaming telespeech ctc model
  120 +
  121 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  122 +tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  123 +rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  124 +
  125 +python3 ./python-api-examples/non_streaming_server.py \
  126 + --telespeech-ctc=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
  127 + --tokens=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt
  128 +
119 ---- 129 ----
120 130
121 To use a certificate so that you can use https, please use 131 To use a certificate so that you can use https, please use
@@ -250,6 +260,15 @@ def add_nemo_ctc_model_args(parser: argparse.ArgumentParser): @@ -250,6 +260,15 @@ def add_nemo_ctc_model_args(parser: argparse.ArgumentParser):
250 ) 260 )
251 261
252 262
  263 +def add_telespeech_ctc_model_args(parser: argparse.ArgumentParser):
  264 + parser.add_argument(
  265 + "--telespeech-ctc",
  266 + default="",
  267 + type=str,
  268 + help="Path to the model.onnx from TeleSpeech CTC",
  269 + )
  270 +
  271 +
253 def add_wenet_ctc_model_args(parser: argparse.ArgumentParser): 272 def add_wenet_ctc_model_args(parser: argparse.ArgumentParser):
254 parser.add_argument( 273 parser.add_argument(
255 "--wenet-ctc", 274 "--wenet-ctc",
@@ -353,6 +372,7 @@ def add_model_args(parser: argparse.ArgumentParser): @@ -353,6 +372,7 @@ def add_model_args(parser: argparse.ArgumentParser):
353 add_sense_voice_model_args(parser) 372 add_sense_voice_model_args(parser)
354 add_nemo_ctc_model_args(parser) 373 add_nemo_ctc_model_args(parser)
355 add_wenet_ctc_model_args(parser) 374 add_wenet_ctc_model_args(parser)
  375 + add_telespeech_ctc_model_args(parser)
356 add_tdnn_ctc_model_args(parser) 376 add_tdnn_ctc_model_args(parser)
357 add_whisper_model_args(parser) 377 add_whisper_model_args(parser)
358 add_moonshine_model_args(parser) 378 add_moonshine_model_args(parser)
@@ -922,6 +942,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: @@ -922,6 +942,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
922 assert len(args.sense_voice) == 0, args.sense_voice 942 assert len(args.sense_voice) == 0, args.sense_voice
923 assert len(args.nemo_ctc) == 0, args.nemo_ctc 943 assert len(args.nemo_ctc) == 0, args.nemo_ctc
924 assert len(args.wenet_ctc) == 0, args.wenet_ctc 944 assert len(args.wenet_ctc) == 0, args.wenet_ctc
  945 + assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
925 assert len(args.whisper_encoder) == 0, args.whisper_encoder 946 assert len(args.whisper_encoder) == 0, args.whisper_encoder
926 assert len(args.whisper_decoder) == 0, args.whisper_decoder 947 assert len(args.whisper_decoder) == 0, args.whisper_decoder
927 assert len(args.tdnn_model) == 0, args.tdnn_model 948 assert len(args.tdnn_model) == 0, args.tdnn_model
@@ -955,6 +976,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: @@ -955,6 +976,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
955 assert len(args.sense_voice) == 0, args.sense_voice 976 assert len(args.sense_voice) == 0, args.sense_voice
956 assert len(args.nemo_ctc) == 0, args.nemo_ctc 977 assert len(args.nemo_ctc) == 0, args.nemo_ctc
957 assert len(args.wenet_ctc) == 0, args.wenet_ctc 978 assert len(args.wenet_ctc) == 0, args.wenet_ctc
  979 + assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
958 assert len(args.whisper_encoder) == 0, args.whisper_encoder 980 assert len(args.whisper_encoder) == 0, args.whisper_encoder
959 assert len(args.whisper_decoder) == 0, args.whisper_decoder 981 assert len(args.whisper_decoder) == 0, args.whisper_decoder
960 assert len(args.tdnn_model) == 0, args.tdnn_model 982 assert len(args.tdnn_model) == 0, args.tdnn_model
@@ -979,6 +1001,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: @@ -979,6 +1001,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
979 elif args.sense_voice: 1001 elif args.sense_voice:
980 assert len(args.nemo_ctc) == 0, args.nemo_ctc 1002 assert len(args.nemo_ctc) == 0, args.nemo_ctc
981 assert len(args.wenet_ctc) == 0, args.wenet_ctc 1003 assert len(args.wenet_ctc) == 0, args.wenet_ctc
  1004 + assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
982 assert len(args.whisper_encoder) == 0, args.whisper_encoder 1005 assert len(args.whisper_encoder) == 0, args.whisper_encoder
983 assert len(args.whisper_decoder) == 0, args.whisper_decoder 1006 assert len(args.whisper_decoder) == 0, args.whisper_decoder
984 assert len(args.tdnn_model) == 0, args.tdnn_model 1007 assert len(args.tdnn_model) == 0, args.tdnn_model
@@ -998,6 +1021,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: @@ -998,6 +1021,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
998 ) 1021 )
999 elif args.nemo_ctc: 1022 elif args.nemo_ctc:
1000 assert len(args.wenet_ctc) == 0, args.wenet_ctc 1023 assert len(args.wenet_ctc) == 0, args.wenet_ctc
  1024 + assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
1001 assert len(args.whisper_encoder) == 0, args.whisper_encoder 1025 assert len(args.whisper_encoder) == 0, args.whisper_encoder
1002 assert len(args.whisper_decoder) == 0, args.whisper_decoder 1026 assert len(args.whisper_decoder) == 0, args.whisper_decoder
1003 assert len(args.tdnn_model) == 0, args.tdnn_model 1027 assert len(args.tdnn_model) == 0, args.tdnn_model
@@ -1020,6 +1044,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: @@ -1020,6 +1044,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
1020 provider=args.provider, 1044 provider=args.provider,
1021 ) 1045 )
1022 elif args.wenet_ctc: 1046 elif args.wenet_ctc:
  1047 + assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
1023 assert len(args.whisper_encoder) == 0, args.whisper_encoder 1048 assert len(args.whisper_encoder) == 0, args.whisper_encoder
1024 assert len(args.whisper_decoder) == 0, args.whisper_decoder 1049 assert len(args.whisper_decoder) == 0, args.whisper_decoder
1025 assert len(args.tdnn_model) == 0, args.tdnn_model 1050 assert len(args.tdnn_model) == 0, args.tdnn_model
@@ -1041,6 +1066,28 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: @@ -1041,6 +1066,28 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
1041 decoding_method=args.decoding_method, 1066 decoding_method=args.decoding_method,
1042 provider=args.provider, 1067 provider=args.provider,
1043 ) 1068 )
  1069 + elif args.telespeech_ctc:
  1070 + assert len(args.whisper_encoder) == 0, args.whisper_encoder
  1071 + assert len(args.whisper_decoder) == 0, args.whisper_decoder
  1072 + assert len(args.tdnn_model) == 0, args.tdnn_model
  1073 + assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
  1074 + assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
  1075 + assert (
  1076 + len(args.moonshine_uncached_decoder) == 0
  1077 + ), args.moonshine_uncached_decoder
  1078 + assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
  1079 +
  1080 + assert_file_exists(args.telespeech_ctc)
  1081 +
  1082 + recognizer = sherpa_onnx.OfflineRecognizer.from_telespeech_ctc(
  1083 + model=args.telespeech_ctc,
  1084 + tokens=args.tokens,
  1085 + num_threads=args.num_threads,
  1086 + sample_rate=args.sample_rate,
  1087 + feature_dim=args.feat_dim,
  1088 + decoding_method=args.decoding_method,
  1089 + provider=args.provider,
  1090 + )
1044 elif args.whisper_encoder: 1091 elif args.whisper_encoder:
1045 assert len(args.tdnn_model) == 0, args.tdnn_model 1092 assert len(args.tdnn_model) == 0, args.tdnn_model
1046 assert_file_exists(args.whisper_encoder) 1093 assert_file_exists(args.whisper_encoder)