Committed by
GitHub
Add int8 quantized whisper large models (#1126)
正在显示
3 个修改的文件
包含
41 行增加
和
20 行删除
| @@ -16,7 +16,7 @@ jobs: | @@ -16,7 +16,7 @@ jobs: | ||
| 16 | fail-fast: false | 16 | fail-fast: false |
| 17 | matrix: | 17 | matrix: |
| 18 | os: [macos-latest] | 18 | os: [macos-latest] |
| 19 | - model: ["distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "distil-large-v2"] | 19 | + model: ["distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2"] |
| 20 | # model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"] | 20 | # model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"] |
| 21 | python-version: ["3.8"] | 21 | python-version: ["3.8"] |
| 22 | 22 | ||
| @@ -56,11 +56,7 @@ jobs: | @@ -56,11 +56,7 @@ jobs: | ||
| 56 | python3 ./export-onnx.py --model ${{ matrix.model }} | 56 | python3 ./export-onnx.py --model ${{ matrix.model }} |
| 57 | # python3 -m onnxruntime.tools.convert_onnx_models_to_ort --optimization_style=Fixed ./ | 57 | # python3 -m onnxruntime.tools.convert_onnx_models_to_ort --optimization_style=Fixed ./ |
| 58 | # | 58 | # |
| 59 | - if [[ $model == medium-aishell ]]; then | ||
| 60 | - ls -lh *.onnx | ||
| 61 | - rm -fv medium-aishell-encoder.onnx | ||
| 62 | - rm -fv medium-aishell-decoder.onnx | ||
| 63 | - fi | 59 | + |
| 64 | 60 | ||
| 65 | ls -lh | 61 | ls -lh |
| 66 | 62 | ||
| @@ -97,16 +93,34 @@ jobs: | @@ -97,16 +93,34 @@ jobs: | ||
| 97 | ls -lh $src | 93 | ls -lh $src |
| 98 | echo "--------------------" | 94 | echo "--------------------" |
| 99 | 95 | ||
| 100 | - if [[ $model == large || $model == large-v1 || $model == large-v2 || $model == distil-large-v2 ]]; then | ||
| 101 | - echo "Don't release model to github for large models. $model" | 96 | + if [[ $model == medium-aishell ]]; then |
| 97 | + ls -lh *.onnx # the float32 onnx model for medium-aishell is too large to be uploaded to GitHub | ||
| 98 | + mkdir -p bak | ||
| 99 | + mv -v $src/$model-encoder.onnx ./bak | ||
| 100 | + mv -v $src/$model-decoder.onnx ./bak | ||
| 101 | + ls -lh $src | ||
| 102 | + | ||
| 103 | + tar cvjf $src.tar.bz2 $src | ||
| 104 | + mv -v ./bak/* $src/ | ||
| 105 | + rm -rf bak | ||
| 106 | + elif [[ -f $src/$model-encoder.weights ]]; then | ||
| 107 | + # we only publish int8 models to GitHub for large Whisper models | ||
| 108 | + mkdir -p bak | ||
| 109 | + mv -v $src/*weights ./bak | ||
| 110 | + mv -v $src/$model-encoder.onnx ./bak | ||
| 111 | + mv -v $src/$model-decoder.onnx ./bak | ||
| 112 | + ls -lh $src | ||
| 113 | + | ||
| 114 | + tar cvjf $src.tar.bz2 $src | ||
| 115 | + mv -v ./bak/* $src/ | ||
| 116 | + rm -rf bak | ||
| 102 | else | 117 | else |
| 103 | tar cvjf $src.tar.bz2 $src | 118 | tar cvjf $src.tar.bz2 $src |
| 104 | fi | 119 | fi |
| 105 | 120 | ||
| 106 | - ls -lh | 121 | + ls -lh *.tar.bz2 |
| 107 | 122 | ||
| 108 | - name: Release | 123 | - name: Release |
| 109 | - if: matrix.model != 'large' && matrix.model != 'large-v1' && matrix.model != 'large-v2' && matrix.model != 'large-v3' && matrix.model != 'distil-large-v2' | ||
| 110 | uses: svenstaro/upload-release-action@v2 | 124 | uses: svenstaro/upload-release-action@v2 |
| 111 | with: | 125 | with: |
| 112 | file_glob: true | 126 | file_glob: true |
| @@ -132,9 +146,7 @@ jobs: | @@ -132,9 +146,7 @@ jobs: | ||
| 132 | 146 | ||
| 133 | git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface | 147 | git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface |
| 134 | 148 | ||
| 135 | - if [[ $model != medium-aishell ]]; then | ||
| 136 | - rm -rf huggingface/* | ||
| 137 | - fi | 149 | + rm -rf huggingface/* |
| 138 | 150 | ||
| 139 | cp -av $src/* ./huggingface/ | 151 | cp -av $src/* ./huggingface/ |
| 140 | 152 | ||
| @@ -149,11 +161,10 @@ jobs: | @@ -149,11 +161,10 @@ jobs: | ||
| 149 | git commit -m "upload ${{ matrix.model }}" | 161 | git commit -m "upload ${{ matrix.model }}" |
| 150 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} main | 162 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} main |
| 151 | 163 | ||
| 152 | - - name: Test ${{ matrix.model }} | 164 | + - name: Test float32 ${{ matrix.model }} |
| 153 | shell: bash | 165 | shell: bash |
| 154 | run: | | 166 | run: | |
| 155 | python3 -m pip install kaldi-native-fbank | 167 | python3 -m pip install kaldi-native-fbank |
| 156 | - git checkout . | ||
| 157 | model=${{ matrix.model }} | 168 | model=${{ matrix.model }} |
| 158 | src=sherpa-onnx-whisper-$model | 169 | src=sherpa-onnx-whisper-$model |
| 159 | time python3 scripts/whisper/test.py \ | 170 | time python3 scripts/whisper/test.py \ |
| @@ -161,3 +172,14 @@ jobs: | @@ -161,3 +172,14 @@ jobs: | ||
| 161 | --decoder $src/$model-decoder.onnx \ | 172 | --decoder $src/$model-decoder.onnx \ |
| 162 | --tokens $src/$model-tokens.txt \ | 173 | --tokens $src/$model-tokens.txt \ |
| 163 | $src/test_wavs/0.wav | 174 | $src/test_wavs/0.wav |
| 175 | + | ||
| 176 | + - name: Test int8 ${{ matrix.model }} | ||
| 177 | + shell: bash | ||
| 178 | + run: | | ||
| 179 | + model=${{ matrix.model }} | ||
| 180 | + src=sherpa-onnx-whisper-$model | ||
| 181 | + time python3 scripts/whisper/test.py \ | ||
| 182 | + --encoder $src/$model-encoder.int8.onnx \ | ||
| 183 | + --decoder $src/$model-decoder.int8.onnx \ | ||
| 184 | + --tokens $src/$model-tokens.txt \ | ||
| 185 | + $src/test_wavs/0.wav |
| @@ -582,9 +582,6 @@ def main(): | @@ -582,9 +582,6 @@ def main(): | ||
| 582 | location=decoder_external_filename + ".weights", | 582 | location=decoder_external_filename + ".weights", |
| 583 | ) | 583 | ) |
| 584 | 584 | ||
| 585 | - if "large" in args.model: | ||
| 586 | - # it causes errors for large models, so skip it. | ||
| 587 | - return | ||
| 588 | # Generate int8 quantization models | 585 | # Generate int8 quantization models |
| 589 | # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection | 586 | # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection |
| 590 | 587 |
| @@ -90,6 +90,7 @@ class OnnxModel: | @@ -90,6 +90,7 @@ class OnnxModel: | ||
| 90 | self.n_text_layer = int(meta["n_text_layer"]) | 90 | self.n_text_layer = int(meta["n_text_layer"]) |
| 91 | self.n_text_ctx = int(meta["n_text_ctx"]) | 91 | self.n_text_ctx = int(meta["n_text_ctx"]) |
| 92 | self.n_text_state = int(meta["n_text_state"]) | 92 | self.n_text_state = int(meta["n_text_state"]) |
| 93 | + self.n_mels = int(meta["n_mels"]) | ||
| 93 | self.sot = int(meta["sot"]) | 94 | self.sot = int(meta["sot"]) |
| 94 | self.eot = int(meta["eot"]) | 95 | self.eot = int(meta["eot"]) |
| 95 | self.translate = int(meta["translate"]) | 96 | self.translate = int(meta["translate"]) |
| @@ -294,8 +295,9 @@ def main(): | @@ -294,8 +295,9 @@ def main(): | ||
| 294 | args = get_args() | 295 | args = get_args() |
| 295 | 296 | ||
| 296 | model = OnnxModel(args.encoder, args.decoder) | 297 | model = OnnxModel(args.encoder, args.decoder) |
| 297 | - dim = 80 if "large-v3" not in args.encoder else 128 | ||
| 298 | - mel = compute_features(args.sound_file, dim=dim) | 298 | + n_mels = model.n_mels |
| 299 | + | ||
| 300 | + mel = compute_features(args.sound_file, dim=n_mels) | ||
| 299 | 301 | ||
| 300 | n_layer_cross_k, n_layer_cross_v = model.run_encoder(mel) | 302 | n_layer_cross_k, n_layer_cross_v = model.run_encoder(mel) |
| 301 | 303 |
-
请 注册 或 登录 后发表评论