Fangjun Kuang
Committed by GitHub

support whisper turbo (#1390)

@@ -16,7 +16,7 @@ jobs: @@ -16,7 +16,7 @@ jobs:
16 fail-fast: false 16 fail-fast: false
17 matrix: 17 matrix:
18 os: [macos-latest] 18 os: [macos-latest]
19 - model: ["distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2"] 19 + model: ["turbo", "distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
20 # model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"] 20 # model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
21 python-version: ["3.8"] 21 python-version: ["3.8"]
22 22
@@ -32,7 +32,8 @@ jobs: @@ -32,7 +32,8 @@ jobs:
32 shell: bash 32 shell: bash
33 run: | 33 run: |
34 python3 -m pip install torch==1.13.0 torchaudio==0.13.0 -f https://download.pytorch.org/whl/cpu/torch_stable.html 34 python3 -m pip install torch==1.13.0 torchaudio==0.13.0 -f https://download.pytorch.org/whl/cpu/torch_stable.html
35 - python3 -m pip install openai-whisper==20231117 onnxruntime onnx soundfile librosa 35 + python3 -m pip install -U openai-whisper
  36 + python3 -m pip install onnxruntime onnx soundfile librosa
36 37
37 - name: export ${{ matrix.model }} 38 - name: export ${{ matrix.model }}
38 shell: bash 39 shell: bash
@@ -46,7 +46,8 @@ def get_args(): @@ -46,7 +46,8 @@ def get_args():
46 choices=[ 46 choices=[
47 "tiny", "tiny.en", "base", "base.en", 47 "tiny", "tiny.en", "base", "base.en",
48 "small", "small.en", "medium", "medium.en", 48 "small", "small.en", "medium", "medium.en",
49 - "large", "large-v1", "large-v2", "large-v3", 49 + "large-v1", "large-v2",
  50 + "large", "large-v3", "turbo", # these three have feature dim 128
50 "distil-medium.en", "distil-small.en", "distil-large-v2", 51 "distil-medium.en", "distil-small.en", "distil-large-v2",
51 # "distil-large-v3", # distil-large-v3 is not supported! 52 # "distil-large-v3", # distil-large-v3 is not supported!
52 # for fine-tuned models from icefall 53 # for fine-tuned models from icefall
@@ -76,7 +77,7 @@ def add_meta_data(filename: str, meta_data: Dict[str, Any]): @@ -76,7 +77,7 @@ def add_meta_data(filename: str, meta_data: Dict[str, Any]):
76 meta.key = key 77 meta.key = key
77 meta.value = str(value) 78 meta.value = str(value)
78 79
79 - if "large" in filename: 80 + if "large" in filename or "turbo" in filename:
80 external_filename = filename.split(".onnx")[0] 81 external_filename = filename.split(".onnx")[0]
81 onnx.save( 82 onnx.save(
82 model, 83 model,
@@ -404,7 +405,7 @@ def main(): @@ -404,7 +405,7 @@ def main():
404 audio = whisper.pad_or_trim(audio) 405 audio = whisper.pad_or_trim(audio)
405 assert audio.shape == (16000 * 30,), audio.shape 406 assert audio.shape == (16000 * 30,), audio.shape
406 407
407 - if args.model in ("large", "large-v3"): 408 + if args.model in ("large", "large-v3", "turbo"):
408 n_mels = 128 409 n_mels = 128
409 else: 410 else:
410 n_mels = 80 411 n_mels = 80