Committed by
GitHub
Add TeleSpeech CTC to non_streaming_server.py (#1649)
正在显示
1 个修改的文件
包含
47 行增加
和
0 行删除
| @@ -116,6 +116,16 @@ python3 ./python-api-examples/non_streaming_server.py \ | @@ -116,6 +116,16 @@ python3 ./python-api-examples/non_streaming_server.py \ | ||
| 116 | --sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \ | 116 | --sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \ |
| 117 | --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt | 117 | --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt |
| 118 | 118 | ||
| 119 | +(9) Use a Non-streaming telespeech ctc model | ||
| 120 | + | ||
| 121 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 | ||
| 122 | +tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 | ||
| 123 | +rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 | ||
| 124 | + | ||
| 125 | +python3 ./python-api-examples/non_streaming_server.py \ | ||
| 126 | + --telespeech-ctc=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \ | ||
| 127 | + --tokens=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt | ||
| 128 | + | ||
| 119 | ---- | 129 | ---- |
| 120 | 130 | ||
| 121 | To use a certificate so that you can use https, please use | 131 | To use a certificate so that you can use https, please use |
| @@ -250,6 +260,15 @@ def add_nemo_ctc_model_args(parser: argparse.ArgumentParser): | @@ -250,6 +260,15 @@ def add_nemo_ctc_model_args(parser: argparse.ArgumentParser): | ||
| 250 | ) | 260 | ) |
| 251 | 261 | ||
| 252 | 262 | ||
| 263 | +def add_telespeech_ctc_model_args(parser: argparse.ArgumentParser): | ||
| 264 | + parser.add_argument( | ||
| 265 | + "--telespeech-ctc", | ||
| 266 | + default="", | ||
| 267 | + type=str, | ||
| 268 | + help="Path to the model.onnx from TeleSpeech CTC", | ||
| 269 | + ) | ||
| 270 | + | ||
| 271 | + | ||
| 253 | def add_wenet_ctc_model_args(parser: argparse.ArgumentParser): | 272 | def add_wenet_ctc_model_args(parser: argparse.ArgumentParser): |
| 254 | parser.add_argument( | 273 | parser.add_argument( |
| 255 | "--wenet-ctc", | 274 | "--wenet-ctc", |
| @@ -353,6 +372,7 @@ def add_model_args(parser: argparse.ArgumentParser): | @@ -353,6 +372,7 @@ def add_model_args(parser: argparse.ArgumentParser): | ||
| 353 | add_sense_voice_model_args(parser) | 372 | add_sense_voice_model_args(parser) |
| 354 | add_nemo_ctc_model_args(parser) | 373 | add_nemo_ctc_model_args(parser) |
| 355 | add_wenet_ctc_model_args(parser) | 374 | add_wenet_ctc_model_args(parser) |
| 375 | + add_telespeech_ctc_model_args(parser) | ||
| 356 | add_tdnn_ctc_model_args(parser) | 376 | add_tdnn_ctc_model_args(parser) |
| 357 | add_whisper_model_args(parser) | 377 | add_whisper_model_args(parser) |
| 358 | add_moonshine_model_args(parser) | 378 | add_moonshine_model_args(parser) |
| @@ -922,6 +942,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | @@ -922,6 +942,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | ||
| 922 | assert len(args.sense_voice) == 0, args.sense_voice | 942 | assert len(args.sense_voice) == 0, args.sense_voice |
| 923 | assert len(args.nemo_ctc) == 0, args.nemo_ctc | 943 | assert len(args.nemo_ctc) == 0, args.nemo_ctc |
| 924 | assert len(args.wenet_ctc) == 0, args.wenet_ctc | 944 | assert len(args.wenet_ctc) == 0, args.wenet_ctc |
| 945 | + assert len(args.telespeech_ctc) == 0, args.telespeech_ctc | ||
| 925 | assert len(args.whisper_encoder) == 0, args.whisper_encoder | 946 | assert len(args.whisper_encoder) == 0, args.whisper_encoder |
| 926 | assert len(args.whisper_decoder) == 0, args.whisper_decoder | 947 | assert len(args.whisper_decoder) == 0, args.whisper_decoder |
| 927 | assert len(args.tdnn_model) == 0, args.tdnn_model | 948 | assert len(args.tdnn_model) == 0, args.tdnn_model |
| @@ -955,6 +976,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | @@ -955,6 +976,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | ||
| 955 | assert len(args.sense_voice) == 0, args.sense_voice | 976 | assert len(args.sense_voice) == 0, args.sense_voice |
| 956 | assert len(args.nemo_ctc) == 0, args.nemo_ctc | 977 | assert len(args.nemo_ctc) == 0, args.nemo_ctc |
| 957 | assert len(args.wenet_ctc) == 0, args.wenet_ctc | 978 | assert len(args.wenet_ctc) == 0, args.wenet_ctc |
| 979 | + assert len(args.telespeech_ctc) == 0, args.telespeech_ctc | ||
| 958 | assert len(args.whisper_encoder) == 0, args.whisper_encoder | 980 | assert len(args.whisper_encoder) == 0, args.whisper_encoder |
| 959 | assert len(args.whisper_decoder) == 0, args.whisper_decoder | 981 | assert len(args.whisper_decoder) == 0, args.whisper_decoder |
| 960 | assert len(args.tdnn_model) == 0, args.tdnn_model | 982 | assert len(args.tdnn_model) == 0, args.tdnn_model |
| @@ -979,6 +1001,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | @@ -979,6 +1001,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | ||
| 979 | elif args.sense_voice: | 1001 | elif args.sense_voice: |
| 980 | assert len(args.nemo_ctc) == 0, args.nemo_ctc | 1002 | assert len(args.nemo_ctc) == 0, args.nemo_ctc |
| 981 | assert len(args.wenet_ctc) == 0, args.wenet_ctc | 1003 | assert len(args.wenet_ctc) == 0, args.wenet_ctc |
| 1004 | + assert len(args.telespeech_ctc) == 0, args.telespeech_ctc | ||
| 982 | assert len(args.whisper_encoder) == 0, args.whisper_encoder | 1005 | assert len(args.whisper_encoder) == 0, args.whisper_encoder |
| 983 | assert len(args.whisper_decoder) == 0, args.whisper_decoder | 1006 | assert len(args.whisper_decoder) == 0, args.whisper_decoder |
| 984 | assert len(args.tdnn_model) == 0, args.tdnn_model | 1007 | assert len(args.tdnn_model) == 0, args.tdnn_model |
| @@ -998,6 +1021,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | @@ -998,6 +1021,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | ||
| 998 | ) | 1021 | ) |
| 999 | elif args.nemo_ctc: | 1022 | elif args.nemo_ctc: |
| 1000 | assert len(args.wenet_ctc) == 0, args.wenet_ctc | 1023 | assert len(args.wenet_ctc) == 0, args.wenet_ctc |
| 1024 | + assert len(args.telespeech_ctc) == 0, args.telespeech_ctc | ||
| 1001 | assert len(args.whisper_encoder) == 0, args.whisper_encoder | 1025 | assert len(args.whisper_encoder) == 0, args.whisper_encoder |
| 1002 | assert len(args.whisper_decoder) == 0, args.whisper_decoder | 1026 | assert len(args.whisper_decoder) == 0, args.whisper_decoder |
| 1003 | assert len(args.tdnn_model) == 0, args.tdnn_model | 1027 | assert len(args.tdnn_model) == 0, args.tdnn_model |
| @@ -1020,6 +1044,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | @@ -1020,6 +1044,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | ||
| 1020 | provider=args.provider, | 1044 | provider=args.provider, |
| 1021 | ) | 1045 | ) |
| 1022 | elif args.wenet_ctc: | 1046 | elif args.wenet_ctc: |
| 1047 | + assert len(args.telespeech_ctc) == 0, args.telespeech_ctc | ||
| 1023 | assert len(args.whisper_encoder) == 0, args.whisper_encoder | 1048 | assert len(args.whisper_encoder) == 0, args.whisper_encoder |
| 1024 | assert len(args.whisper_decoder) == 0, args.whisper_decoder | 1049 | assert len(args.whisper_decoder) == 0, args.whisper_decoder |
| 1025 | assert len(args.tdnn_model) == 0, args.tdnn_model | 1050 | assert len(args.tdnn_model) == 0, args.tdnn_model |
| @@ -1041,6 +1066,28 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | @@ -1041,6 +1066,28 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | ||
| 1041 | decoding_method=args.decoding_method, | 1066 | decoding_method=args.decoding_method, |
| 1042 | provider=args.provider, | 1067 | provider=args.provider, |
| 1043 | ) | 1068 | ) |
| 1069 | + elif args.telespeech_ctc: | ||
| 1070 | + assert len(args.whisper_encoder) == 0, args.whisper_encoder | ||
| 1071 | + assert len(args.whisper_decoder) == 0, args.whisper_decoder | ||
| 1072 | + assert len(args.tdnn_model) == 0, args.tdnn_model | ||
| 1073 | + assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor | ||
| 1074 | + assert len(args.moonshine_encoder) == 0, args.moonshine_encoder | ||
| 1075 | + assert ( | ||
| 1076 | + len(args.moonshine_uncached_decoder) == 0 | ||
| 1077 | + ), args.moonshine_uncached_decoder | ||
| 1078 | + assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder | ||
| 1079 | + | ||
| 1080 | + assert_file_exists(args.telespeech_ctc) | ||
| 1081 | + | ||
| 1082 | + recognizer = sherpa_onnx.OfflineRecognizer.from_telespeech_ctc( | ||
| 1083 | + model=args.telespeech_ctc, | ||
| 1084 | + tokens=args.tokens, | ||
| 1085 | + num_threads=args.num_threads, | ||
| 1086 | + sample_rate=args.sample_rate, | ||
| 1087 | + feature_dim=args.feat_dim, | ||
| 1088 | + decoding_method=args.decoding_method, | ||
| 1089 | + provider=args.provider, | ||
| 1090 | + ) | ||
| 1044 | elif args.whisper_encoder: | 1091 | elif args.whisper_encoder: |
| 1045 | assert len(args.tdnn_model) == 0, args.tdnn_model | 1092 | assert len(args.tdnn_model) == 0, args.tdnn_model |
| 1046 | assert_file_exists(args.whisper_encoder) | 1093 | assert_file_exists(args.whisper_encoder) |
-
请 注册 或 登录 后发表评论