Fangjun Kuang
Committed by GitHub

Add pre-trained models for the Libriheavy dataset (#1122)

  1 +name: export-libriheavy-to-onnx
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - libriheavy-model
  7 + workflow_dispatch:
  8 +
  9 +concurrency:
  10 + group: export-libriheavy-to-onnx-${{ github.ref }}
  11 + cancel-in-progress: true
  12 +
  13 +jobs:
  14 + export-libriheavy-to-onnx:
  15 + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
  16 + name: export libriheavy
  17 + runs-on: ${{ matrix.os }}
  18 + strategy:
  19 + fail-fast: false
  20 + matrix:
  21 + os: [ubuntu-latest]
  22 + python-version: ["3.8"]
  23 +
  24 + steps:
  25 + - uses: actions/checkout@v4
  26 +
  27 + - name: Setup Python ${{ matrix.python-version }}
  28 + uses: actions/setup-python@v5
  29 + with:
  30 + python-version: ${{ matrix.python-version }}
  31 +
  32 + - name: Run
  33 + shell: bash
  34 + run: |
  35 + cd scripts/icefall
  36 + ./run-libriheavy.sh
  37 + ./run-libriheavy-punct-case.sh
  38 +
  39 + - name: Publish to huggingface
  40 + env:
  41 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  42 + uses: nick-fields/retry@v3
  43 + with:
  44 + max_attempts: 20
  45 + timeout_seconds: 200
  46 + shell: bash
  47 + command: |
  48 + git config --global user.email "csukuangfj@gmail.com"
  49 + git config --global user.name "Fangjun Kuang"
  50 +
  51 + for m in large medium small; do
  52 + rm -rf huggingface
  53 + export GIT_LFS_SKIP_SMUDGE=1
  54 + export GIT_CLONE_PROTECTION_ACTIVE=false
  55 +
  56 + src=sherpa-onnx-zipformer-en-libriheavy-20230926-$m
  57 + echo "Process $src"
  58 +
  59 + git clone https://huggingface.co/csukuangfj/$src huggingface
  60 + cd huggingface
  61 + git fetch
  62 + git pull
  63 +
  64 + cp -av ../scripts/icefall/$src/* .
  65 +
  66 + git lfs track "*.onnx"
  67 + git add .
  68 +
  69 + git commit -m "add large"
  70 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src main || true
  71 +
  72 + cd ..
  73 +
  74 + rm -rf huggingface/.git*
  75 +
  76 + mv huggingface $src
  77 +
  78 + tar cjvf $src.tar.bz2 $src
  79 + rm -rf $src
  80 + ls -lh
  81 + done
  82 +
  83 + - name: Publish to huggingface (case and punct)
  84 + env:
  85 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  86 + uses: nick-fields/retry@v3
  87 + with:
  88 + max_attempts: 20
  89 + timeout_seconds: 200
  90 + shell: bash
  91 + command: |
  92 + git config --global user.email "csukuangfj@gmail.com"
  93 + git config --global user.name "Fangjun Kuang"
  94 +
  95 + for m in large medium small; do
  96 + rm -rf huggingface
  97 + export GIT_LFS_SKIP_SMUDGE=1
  98 + export GIT_CLONE_PROTECTION_ACTIVE=false
  99 +
  100 + src=sherpa-onnx-zipformer-en-libriheavy-20230830-$m-punct-case
  101 + echo "Process $src"
  102 +
  103 + git clone https://huggingface.co/csukuangfj/$src huggingface
  104 + cd huggingface
  105 + git fetch
  106 + git pull
  107 +
  108 + cp -av ../scripts/icefall/$src/* .
  109 +
  110 + git lfs track "*.onnx"
  111 + git add .
  112 +
  113 + git commit -m "add large"
  114 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src main || true
  115 +
  116 + cd ..
  117 +
  118 + rm -rf huggingface/.git*
  119 +
  120 + mv huggingface $src
  121 +
  122 + tar cjvf $src.tar.bz2 $src
  123 + rm -rf $src
  124 + ls -lh
  125 + done
  126 +
  127 + - name: Release
  128 + uses: svenstaro/upload-release-action@v2
  129 + with:
  130 + file_glob: true
  131 + file: ./*.tar.bz2
  132 + overwrite: true
  133 + repo_name: k2-fsa/sherpa-onnx
  134 + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
  135 + tag: asr-models
  136 +
@@ -80,7 +80,7 @@ def get_2nd_models(): @@ -80,7 +80,7 @@ def get_2nd_models():
80 80
81 rm -fv README.md 81 rm -fv README.md
82 rm -rfv test_wavs 82 rm -rfv test_wavs
83 - rm model.onnx 83 + rm -fv model.onnx
84 84
85 ls -lh 85 ls -lh
86 86
@@ -82,7 +82,7 @@ def get_models(): @@ -82,7 +82,7 @@ def get_models():
82 82
83 rm -fv README.md 83 rm -fv README.md
84 rm -rfv test_wavs 84 rm -rfv test_wavs
85 - rm model.onnx 85 + rm -fv model.onnx
86 86
87 ls -lh 87 ls -lh
88 88
@@ -189,7 +189,7 @@ def get_models(): @@ -189,7 +189,7 @@ def get_models():
189 pushd $model_name 189 pushd $model_name
190 190
191 rm -rfv test_wavs 191 rm -rfv test_wavs
192 - rm test.py 192 + rm -fv test.py
193 193
194 ls -lh 194 ls -lh
195 195
@@ -208,8 +208,8 @@ def get_models(): @@ -208,8 +208,8 @@ def get_models():
208 rm -fv README.md 208 rm -fv README.md
209 rm -fv bpe.model 209 rm -fv bpe.model
210 210
211 - rm encoder-epoch-12-avg-5.onnx  
212 - rm decoder-epoch-12-avg-5.int8.onnx 211 + rm -fv encoder-epoch-12-avg-5.onnx
  212 + rm -fv decoder-epoch-12-avg-5.int8.onnx
213 rm joiner-epoch-12-avg-5.onnx 213 rm joiner-epoch-12-avg-5.onnx
214 214
215 ls -lh 215 ls -lh
@@ -229,9 +229,9 @@ def get_models(): @@ -229,9 +229,9 @@ def get_models():
229 rm -fv README.md 229 rm -fv README.md
230 rm -fv bpe.model 230 rm -fv bpe.model
231 231
232 - rm encoder-epoch-99-avg-1.onnx  
233 - rm decoder-epoch-99-avg-1.int8.onnx  
234 - rm joiner-epoch-99-avg-1.onnx 232 + rm -fv encoder-epoch-99-avg-1.onnx
  233 + rm -fv decoder-epoch-99-avg-1.int8.onnx
  234 + rm -fv joiner-epoch-99-avg-1.onnx
235 235
236 ls -lh 236 ls -lh
237 237
  1 +#!/usr/bin/env bash
  2 +set -ex
  3 +
  4 +cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd)
  5 +sherpa_onnx_dir=$(cd $cur_dir/../.. && pwd)
  6 +echo "sherpa_onnx_dir: $sherpa_onnx_dir"
  7 +
  8 +pip install sherpa-onnx # for testing
  9 +
  10 +function download_model() {
  11 + git lfs install
  12 + git clone https://www.modelscope.cn/pkufool/icefall-asr-zipformer-libriheavy-punc-20230830.git
  13 +}
  14 +
  15 +function download_test_wavs() {
  16 + d=$1
  17 + mkdir $d/test_wavs
  18 + pushd $d/test_wavs
  19 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/0.wav
  20 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/1.wav
  21 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/8k.wav
  22 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/trans.txt
  23 + popd
  24 +}
  25 +
  26 +function export_large() {
  27 + echo "----------large----------"
  28 + src=icefall-asr-zipformer-libriheavy-punc-20230830
  29 + dst=sherpa-onnx-zipformer-en-libriheavy-20230830-large-punct-case
  30 + mkdir $dst
  31 +
  32 + cp -v $src/data/lang_bpe_756/bpe.model $dst/
  33 + cp -v $src/data/lang_bpe_756/tokens.txt $dst/
  34 + cp -v $src/exp/*.onnx $dst/
  35 + download_test_wavs $dst
  36 +
  37 + ls -lh $dst
  38 + ls -lh $dst/test_wavs
  39 +
  40 + sherpa-onnx-offline \
  41 + --encoder=$dst/encoder-epoch-16-avg-2.onnx \
  42 + --decoder=$dst/decoder-epoch-16-avg-2.onnx \
  43 + --joiner=$dst/joiner-epoch-16-avg-2.onnx \
  44 + --tokens=$dst/tokens.txt \
  45 + $dst/test_wavs/0.wav \
  46 + $dst/test_wavs/1.wav \
  47 + $dst/test_wavs/8k.wav
  48 +
  49 + sherpa-onnx-offline \
  50 + --encoder=$dst/encoder-epoch-16-avg-2.int8.onnx \
  51 + --decoder=$dst/decoder-epoch-16-avg-2.onnx \
  52 + --joiner=$dst/joiner-epoch-16-avg-2.int8.onnx \
  53 + --tokens=$dst/tokens.txt \
  54 + $dst/test_wavs/0.wav \
  55 + $dst/test_wavs/1.wav \
  56 + $dst/test_wavs/8k.wav
  57 +}
  58 +
  59 +function export_medium() {
  60 + echo "----------medium subset----------"
  61 + src=icefall-asr-zipformer-libriheavy-punc-20230830
  62 + dst=sherpa-onnx-zipformer-en-libriheavy-20230830-medium-punct-case
  63 + mkdir $dst
  64 +
  65 + cp -v $src/data/lang_bpe_756/bpe.model $dst/
  66 + cp -v $src/data/lang_bpe_756/tokens.txt $dst/
  67 + cp -v $src/exp_medium_subset/*.onnx $dst/
  68 + download_test_wavs $dst
  69 +
  70 + ls -lh $dst
  71 + ls -lh $dst/test_wavs
  72 +
  73 + sherpa-onnx-offline \
  74 + --encoder=$dst/encoder-epoch-50-avg-15.onnx \
  75 + --decoder=$dst/decoder-epoch-50-avg-15.onnx \
  76 + --joiner=$dst/joiner-epoch-50-avg-15.onnx \
  77 + --tokens=$dst/tokens.txt \
  78 + $dst/test_wavs/0.wav \
  79 + $dst/test_wavs/1.wav \
  80 + $dst/test_wavs/8k.wav
  81 +
  82 + sherpa-onnx-offline \
  83 + --encoder=$dst/encoder-epoch-50-avg-15.int8.onnx \
  84 + --decoder=$dst/decoder-epoch-50-avg-15.onnx \
  85 + --joiner=$dst/joiner-epoch-50-avg-15.int8.onnx \
  86 + --tokens=$dst/tokens.txt \
  87 + $dst/test_wavs/0.wav \
  88 + $dst/test_wavs/1.wav \
  89 + $dst/test_wavs/8k.wav
  90 +}
  91 +
  92 +function export_small() {
  93 + echo "----------small subset----------"
  94 + src=icefall-asr-zipformer-libriheavy-punc-20230830
  95 + dst=sherpa-onnx-zipformer-en-libriheavy-20230830-small-punct-case
  96 + mkdir $dst
  97 +
  98 + cp -v $src/data/lang_bpe_756/bpe.model $dst/
  99 + cp -v $src/data/lang_bpe_756/tokens.txt $dst/
  100 + cp -v $src/exp_small_subset/*.onnx $dst/
  101 + download_test_wavs $dst
  102 +
  103 + ls -lh $dst
  104 + ls -lh $dst/test_wavs
  105 +
  106 + sherpa-onnx-offline \
  107 + --encoder=$dst/encoder-epoch-88-avg-41.onnx \
  108 + --decoder=$dst/decoder-epoch-88-avg-41.onnx \
  109 + --joiner=$dst/joiner-epoch-88-avg-41.onnx \
  110 + --tokens=$dst/tokens.txt \
  111 + $dst/test_wavs/0.wav \
  112 + $dst/test_wavs/1.wav \
  113 + $dst/test_wavs/8k.wav
  114 +
  115 + sherpa-onnx-offline \
  116 + --encoder=$dst/encoder-epoch-88-avg-41.int8.onnx \
  117 + --decoder=$dst/decoder-epoch-88-avg-41.onnx \
  118 + --joiner=$dst/joiner-epoch-88-avg-41.int8.onnx \
  119 + --tokens=$dst/tokens.txt \
  120 + $dst/test_wavs/0.wav \
  121 + $dst/test_wavs/1.wav \
  122 + $dst/test_wavs/8k.wav
  123 +}
  124 +
  125 +download_model
  126 +
  127 +export_large
  128 +export_medium
  129 +export_small
  130 +
  131 +rm -rf icefall-asr-zipformer-libriheavy-punc-20230830
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd)
  6 +sherpa_onnx_dir=$(cd $cur_dir/../.. && pwd)
  7 +echo "sherpa_onnx_dir: $sherpa_onnx_dir"
  8 +
  9 +pip install sherpa-onnx # for testing
  10 +
  11 +function download_model() {
  12 + git lfs install
  13 + git clone https://www.modelscope.cn/pkufool/icefall-asr-zipformer-libriheavy-20230926.git
  14 +}
  15 +
  16 +function download_test_wavs() {
  17 + d=$1
  18 + mkdir $d/test_wavs
  19 + pushd $d/test_wavs
  20 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/0.wav
  21 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/1.wav
  22 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/8k.wav
  23 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/trans.txt
  24 + popd
  25 +}
  26 +
  27 +function export_large() {
  28 + echo "----------large----------"
  29 + src=icefall-asr-zipformer-libriheavy-20230926
  30 + dst=sherpa-onnx-zipformer-en-libriheavy-20230926-large
  31 + mkdir $dst
  32 +
  33 + cp -v $src/data/lang_bpe_500/bpe.model $dst/
  34 + cp -v $src/data/lang_bpe_500/tokens.txt $dst/
  35 + cp -v $src/exp/*.onnx $dst/
  36 + download_test_wavs $dst
  37 +
  38 + ls -lh $dst
  39 + ls -lh $dst/test_wavs
  40 +
  41 + sherpa-onnx-offline \
  42 + --encoder=$dst/encoder-epoch-16-avg-3.onnx \
  43 + --decoder=$dst/decoder-epoch-16-avg-3.onnx \
  44 + --joiner=$dst/joiner-epoch-16-avg-3.onnx \
  45 + --tokens=$dst/tokens.txt \
  46 + $dst/test_wavs/0.wav \
  47 + $dst/test_wavs/1.wav \
  48 + $dst/test_wavs/8k.wav
  49 +
  50 + sherpa-onnx-offline \
  51 + --encoder=$dst/encoder-epoch-16-avg-3.int8.onnx \
  52 + --decoder=$dst/decoder-epoch-16-avg-3.onnx \
  53 + --joiner=$dst/joiner-epoch-16-avg-3.int8.onnx \
  54 + --tokens=$dst/tokens.txt \
  55 + $dst/test_wavs/0.wav \
  56 + $dst/test_wavs/1.wav \
  57 + $dst/test_wavs/8k.wav
  58 +}
  59 +
  60 +function export_medium() {
  61 + echo "----------medium subset----------"
  62 + src=icefall-asr-zipformer-libriheavy-20230926
  63 + dst=sherpa-onnx-zipformer-en-libriheavy-20230926-medium
  64 + mkdir $dst
  65 +
  66 + cp -v $src/data/lang_bpe_500/bpe.model $dst/
  67 + cp -v $src/data/lang_bpe_500/tokens.txt $dst/
  68 + cp -v $src/exp_medium_subset/*.onnx $dst/
  69 + download_test_wavs $dst
  70 +
  71 + ls -lh $dst
  72 + ls -lh $dst/test_wavs
  73 +
  74 + sherpa-onnx-offline \
  75 + --encoder=$dst/encoder-epoch-60-avg-20.onnx \
  76 + --decoder=$dst/decoder-epoch-60-avg-20.onnx \
  77 + --joiner=$dst/joiner-epoch-60-avg-20.onnx \
  78 + --tokens=$dst/tokens.txt \
  79 + $dst/test_wavs/0.wav \
  80 + $dst/test_wavs/1.wav \
  81 + $dst/test_wavs/8k.wav
  82 +
  83 + sherpa-onnx-offline \
  84 + --encoder=$dst/encoder-epoch-60-avg-20.int8.onnx \
  85 + --decoder=$dst/decoder-epoch-60-avg-20.onnx \
  86 + --joiner=$dst/joiner-epoch-60-avg-20.int8.onnx \
  87 + --tokens=$dst/tokens.txt \
  88 + $dst/test_wavs/0.wav \
  89 + $dst/test_wavs/1.wav \
  90 + $dst/test_wavs/8k.wav
  91 +}
  92 +
  93 +function export_small() {
  94 + echo "----------small subset----------"
  95 + src=icefall-asr-zipformer-libriheavy-20230926
  96 + dst=sherpa-onnx-zipformer-en-libriheavy-20230926-small
  97 + mkdir $dst
  98 +
  99 + cp -v $src/data/lang_bpe_500/bpe.model $dst/
  100 + cp -v $src/data/lang_bpe_500/tokens.txt $dst/
  101 + cp -v $src/exp_small_subset/*.onnx $dst/
  102 + download_test_wavs $dst
  103 +
  104 + ls -lh $dst
  105 + ls -lh $dst/test_wavs
  106 +
  107 + sherpa-onnx-offline \
  108 + --encoder=$dst/encoder-epoch-90-avg-20.onnx \
  109 + --decoder=$dst/decoder-epoch-90-avg-20.onnx \
  110 + --joiner=$dst/joiner-epoch-90-avg-20.onnx \
  111 + --tokens=$dst/tokens.txt \
  112 + $dst/test_wavs/0.wav \
  113 + $dst/test_wavs/1.wav \
  114 + $dst/test_wavs/8k.wav
  115 +
  116 + sherpa-onnx-offline \
  117 + --encoder=$dst/encoder-epoch-90-avg-20.int8.onnx \
  118 + --decoder=$dst/decoder-epoch-90-avg-20.onnx \
  119 + --joiner=$dst/joiner-epoch-90-avg-20.int8.onnx \
  120 + --tokens=$dst/tokens.txt \
  121 + $dst/test_wavs/0.wav \
  122 + $dst/test_wavs/1.wav \
  123 + $dst/test_wavs/8k.wav
  124 +}
  125 +
  126 +download_model
  127 +
  128 +export_large
  129 +export_medium
  130 +export_small
  131 +
  132 +rm -rf icefall-asr-zipformer-libriheavy-20230926