Committed by
GitHub
'update20241203' (#1589)
add '--modeling-unit' and "--bpe-vocab" to /sherpa-onnx/python-api-examples/streaming_server.py make it specifiable.
正在显示
1 个修改的文件
包含
24 行增加
和
0 行删除
| @@ -229,6 +229,28 @@ def add_hotwords_args(parser: argparse.ArgumentParser): | @@ -229,6 +229,28 @@ def add_hotwords_args(parser: argparse.ArgumentParser): | ||
| 229 | --hotwords-file is given. | 229 | --hotwords-file is given. |
| 230 | """, | 230 | """, |
| 231 | ) | 231 | ) |
| 232 | + parser.add_argument( | ||
| 233 | + "--modeling-unit", | ||
| 234 | + type=str, | ||
| 235 | + default='cjkchar', | ||
| 236 | + help=""" | ||
| 237 | + The modeling unit of the used model. Current supported units are: | ||
| 238 | + - cjkchar(for Chinese) | ||
| 239 | + - bpe(for English like languages) | ||
| 240 | + - cjkchar+bpe(for multilingual models) | ||
| 241 | + """, | ||
| 242 | + ) | ||
| 243 | + parser.add_argument( | ||
| 244 | + "--bpe-vocab", | ||
| 245 | + type=str, | ||
| 246 | + default='', | ||
| 247 | + help=""" | ||
| 248 | + The bpe vocabulary generated by sentencepiece toolkit. | ||
| 249 | + It is only used when modeling-unit is bpe or cjkchar+bpe. | ||
| 250 | + if you can’t find bpe.vocab in the model directory, please run: | ||
| 251 | + python script/export_bpe_vocab.py --bpe-model exp/bpe.model | ||
| 252 | + """, | ||
| 253 | + ) | ||
| 232 | 254 | ||
| 233 | 255 | ||
| 234 | def add_modified_beam_search_args(parser: argparse.ArgumentParser): | 256 | def add_modified_beam_search_args(parser: argparse.ArgumentParser): |
| @@ -409,6 +431,8 @@ def create_recognizer(args) -> sherpa_onnx.OnlineRecognizer: | @@ -409,6 +431,8 @@ def create_recognizer(args) -> sherpa_onnx.OnlineRecognizer: | ||
| 409 | rule2_min_trailing_silence=args.rule2_min_trailing_silence, | 431 | rule2_min_trailing_silence=args.rule2_min_trailing_silence, |
| 410 | rule3_min_utterance_length=args.rule3_min_utterance_length, | 432 | rule3_min_utterance_length=args.rule3_min_utterance_length, |
| 411 | provider=args.provider, | 433 | provider=args.provider, |
| 434 | + modeling_unit=args.modeling_unit, | ||
| 435 | + bpe_vocab=args.bpe_vocab | ||
| 412 | ) | 436 | ) |
| 413 | elif args.paraformer_encoder: | 437 | elif args.paraformer_encoder: |
| 414 | recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer( | 438 | recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer( |
-
请 注册 或 登录 后发表评论