Fangjun Kuang
Committed by GitHub

Export whisper distil-large-v3 and distil-large-v3.5 to sherpa-onnx (#2506)

This PR adds support for exporting two new whisper distil models (distil-large-v3 and distil-large-v3.5) to ONNX format for use with sherpa-onnx. The changes enable these models to be processed through the existing export pipeline.

- Added support for distil-large-v3 and distil-large-v3.5 models in the export script
- Updated GitHub workflow to include the new models in the CI matrix
- Configured proper n_mels parameter (128) for the new distil models
... ... @@ -16,8 +16,9 @@ jobs:
fail-fast: false
matrix:
os: [macos-latest]
model: ["turbo", "distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
model: ["turbo", "distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2", "distil-large-v3", "distil-large-v3.5"]
# model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
# model: ["distil-large-v3.5", "distil-large-v3"]
python-version: ["3.8"]
steps:
... ... @@ -47,6 +48,12 @@ jobs:
elif [[ $model == distil-large-v2 ]]; then
wget -q -O distil-large-v2-original-model.bin https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/original-model.bin
ls -lh
elif [[ $model == distil-large-v3 ]]; then
wget -q -O distil-large-v3-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3-openai/resolve/main/model.bin
ls -lh
elif [[ $model == distil-large-v3.5 ]]; then
wget -q -O distil-large-v3.5-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3.5-openai/resolve/main/model.bin
ls -lh
elif [[ $model == distil-small.en ]]; then
wget -q -O distil-small-en-original-model.bin https://huggingface.co/distil-whisper/distil-small.en/resolve/main/original-model.bin
ls -lh
... ... @@ -155,6 +162,7 @@ jobs:
git status
ls -lh
git lfs track "*.wav*"
git lfs track "*onnx*"
git lfs track "*weights*"
... ...
... ... @@ -49,7 +49,8 @@ def get_args():
"large-v1", "large-v2",
"large", "large-v3", "turbo", # these three have feature dim 128
"distil-medium.en", "distil-small.en", "distil-large-v2",
# "distil-large-v3", # distil-large-v3 is not supported!
"distil-large-v3",
"distil-large-v3.5",
# for fine-tuned models from icefall
"medium-aishell",
],
... ... @@ -348,6 +349,32 @@ def main():
"""
)
model = whisper.load_model(filename)
elif name == "distil-large-v3":
filename = "./distil-large-v3-original-model.bin"
if not Path(filename).is_file():
raise ValueError(
"""
Please go to https://huggingface.co/distil-whisper/distil-large-v3-openai
to download model.bin
You can use the following command to do that:
wget -O distil-large-v3-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3-openai/resolve/main/model.bin
"""
)
model = whisper.load_model(filename)
elif name == "distil-large-v3.5":
filename = "./distil-large-v3.5-original-model.bin"
if not Path(filename).is_file():
raise ValueError(
"""
Please go to https://huggingface.co/distil-whisper/distil-large-v3.5-openai/
to download model.bin
You can use the following command to do that:
wget -O distil-large-v3.5-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3.5-openai/resolve/main/model.bin
"""
)
model = whisper.load_model(filename)
elif name == "distil-small.en":
filename = "./distil-small-en-original-model.bin"
if not Path(filename).is_file():
... ... @@ -405,10 +432,17 @@ def main():
audio = whisper.pad_or_trim(audio)
assert audio.shape == (16000 * 30,), audio.shape
if args.model in ("large", "large-v3", "turbo"):
if args.model in ("distil-large-v3", "distil-large-v3.5"):
n_mels = 128
elif args.model in (
"large",
"large-v3",
"turbo",
):
n_mels = 128
else:
n_mels = 80
mel = (
whisper.log_mel_spectrogram(audio, n_mels=n_mels).to(model.device).unsqueeze(0)
)
... ...