Export whisper distil-large-v3 and distil-large-v3.5 to sherpa-onnx (#2506)

This PR adds support for exporting two new whisper distil models (distil-large-v3 and distil-large-v3.5) to ONNX format for use with sherpa-onnx. The changes enable these models to be processed through the existing export pipeline. - Added support for distil-large-v3 and distil-large-v3.5 models in the export script - Updated GitHub workflow to include the new models in the CI matrix - Configured proper n_mels parameter (128) for the new distil models

Export whisper distil-large-v3 and distil-large-v3.5 to sherpa-onnx (#2506)
This PR adds support for exporting two new whisper distil models (distil-large-v3 and distil-large-v3.5) to ONNX format for use with sherpa-onnx. The changes enable these models to be processed through the existing export pipeline. - Added support for distil-large-v3 and distil-large-v3.5 models in the export script - Updated GitHub workflow to include the new models in the CI matrix - Configured proper n_mels parameter (128) for the new distil models
Fangjun Kuang · GitHub
Commit c455ad2a8eeb54844c218fc58544f2c09c45e937 c455ad2a 1 parent 896afa72
.github/workflows/export-whisper-to-onnx.yaml
scripts/whisper/export-onnx.py
--- a/.github/workflows/export-whisper-to-onnx.yaml
查看文件 @c455ad2
+++ b/.github/workflows/export-whisper-to-onnx.yaml
查看文件 @c455ad2
@@ -16,8 +16,9 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest]
-         model: ["turbo", "distil-medium.en", "distil-small.en",  "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
+         model: ["turbo", "distil-medium.en", "distil-small.en",  "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2", "distil-large-v3", "distil-large-v3.5"]
         # model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
+         # model: ["distil-large-v3.5", "distil-large-v3"]
         python-version: ["3.8"]
 
     steps:
@@ -47,6 +48,12 @@ jobs:
           elif [[ $model == distil-large-v2 ]]; then
             wget -q -O distil-large-v2-original-model.bin https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/original-model.bin
             ls -lh
+           elif [[ $model == distil-large-v3 ]]; then
+             wget -q -O distil-large-v3-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3-openai/resolve/main/model.bin
+             ls -lh
+           elif [[ $model == distil-large-v3.5 ]]; then
+             wget -q -O distil-large-v3.5-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3.5-openai/resolve/main/model.bin
+             ls -lh
           elif [[ $model == distil-small.en ]]; then
             wget -q -O distil-small-en-original-model.bin https://huggingface.co/distil-whisper/distil-small.en/resolve/main/original-model.bin
             ls -lh
@@ -155,6 +162,7 @@ jobs:
 
           git status
           ls -lh
+           git lfs track "*.wav*"
           git lfs track "*onnx*"
           git lfs track "*weights*"
 
--- a/scripts/whisper/export-onnx.py
查看文件 @c455ad2
+++ b/scripts/whisper/export-onnx.py
查看文件 @c455ad2
@@ -49,7 +49,8 @@ def get_args():
             "large-v1", "large-v2",
             "large", "large-v3", "turbo", # these three have feature dim 128
             "distil-medium.en", "distil-small.en", "distil-large-v2",
-             # "distil-large-v3", # distil-large-v3 is not supported!
+             "distil-large-v3",
+             "distil-large-v3.5",
             # for fine-tuned models from icefall
             "medium-aishell",
             ],
@@ -348,6 +349,32 @@ def main():
             """
             )
         model = whisper.load_model(filename)
+     elif name == "distil-large-v3":
+         filename = "./distil-large-v3-original-model.bin"
+         if not Path(filename).is_file():
+             raise ValueError(
+                 """
+                 Please go to https://huggingface.co/distil-whisper/distil-large-v3-openai
+                 to download model.bin
+                 You can use the following command to do that:
+ 
+                 wget -O distil-large-v3-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3-openai/resolve/main/model.bin
+             """
+             )
+         model = whisper.load_model(filename)
+     elif name == "distil-large-v3.5":
+         filename = "./distil-large-v3.5-original-model.bin"
+         if not Path(filename).is_file():
+             raise ValueError(
+                 """
+                 Please go to https://huggingface.co/distil-whisper/distil-large-v3.5-openai/
+                 to download model.bin
+                 You can use the following command to do that:
+ 
+                 wget -O distil-large-v3.5-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3.5-openai/resolve/main/model.bin
+             """
+             )
+         model = whisper.load_model(filename)
     elif name == "distil-small.en":
         filename = "./distil-small-en-original-model.bin"
         if not Path(filename).is_file():
@@ -405,10 +432,17 @@ def main():
     audio = whisper.pad_or_trim(audio)
     assert audio.shape == (16000 * 30,), audio.shape
 
-     if args.model in ("large", "large-v3", "turbo"):
+     if args.model in ("distil-large-v3", "distil-large-v3.5"):
+         n_mels = 128
+     elif args.model in (
+         "large",
+         "large-v3",
+         "turbo",
+     ):
         n_mels = 128
     else:
         n_mels = 80
+ 
     mel = (
         whisper.log_mel_spectrogram(audio, n_mels=n_mels).to(model.device).unsqueeze(0)
     )