run-libriheavy-punct-case.sh
3.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env bash
set -ex
cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd)
sherpa_onnx_dir=$(cd $cur_dir/../.. && pwd)
echo "sherpa_onnx_dir: $sherpa_onnx_dir"
pip install sherpa-onnx # for testing
function download_model() {
git lfs install
git clone https://www.modelscope.cn/pkufool/icefall-asr-zipformer-libriheavy-punc-20230830.git
}
function download_test_wavs() {
d=$1
mkdir $d/test_wavs
pushd $d/test_wavs
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/8k.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/trans.txt
popd
}
function export_large() {
echo "----------large----------"
src=icefall-asr-zipformer-libriheavy-punc-20230830
dst=sherpa-onnx-zipformer-en-libriheavy-20230830-large-punct-case
mkdir $dst
cp -v $src/data/lang_bpe_756/bpe.model $dst/
cp -v $src/data/lang_bpe_756/tokens.txt $dst/
cp -v $src/exp/*.onnx $dst/
download_test_wavs $dst
ls -lh $dst
ls -lh $dst/test_wavs
sherpa-onnx-offline \
--encoder=$dst/encoder-epoch-16-avg-2.onnx \
--decoder=$dst/decoder-epoch-16-avg-2.onnx \
--joiner=$dst/joiner-epoch-16-avg-2.onnx \
--tokens=$dst/tokens.txt \
$dst/test_wavs/0.wav \
$dst/test_wavs/1.wav \
$dst/test_wavs/8k.wav
sherpa-onnx-offline \
--encoder=$dst/encoder-epoch-16-avg-2.int8.onnx \
--decoder=$dst/decoder-epoch-16-avg-2.onnx \
--joiner=$dst/joiner-epoch-16-avg-2.int8.onnx \
--tokens=$dst/tokens.txt \
$dst/test_wavs/0.wav \
$dst/test_wavs/1.wav \
$dst/test_wavs/8k.wav
}
function export_medium() {
echo "----------medium subset----------"
src=icefall-asr-zipformer-libriheavy-punc-20230830
dst=sherpa-onnx-zipformer-en-libriheavy-20230830-medium-punct-case
mkdir $dst
cp -v $src/data/lang_bpe_756/bpe.model $dst/
cp -v $src/data/lang_bpe_756/tokens.txt $dst/
cp -v $src/exp_medium_subset/*.onnx $dst/
download_test_wavs $dst
ls -lh $dst
ls -lh $dst/test_wavs
sherpa-onnx-offline \
--encoder=$dst/encoder-epoch-50-avg-15.onnx \
--decoder=$dst/decoder-epoch-50-avg-15.onnx \
--joiner=$dst/joiner-epoch-50-avg-15.onnx \
--tokens=$dst/tokens.txt \
$dst/test_wavs/0.wav \
$dst/test_wavs/1.wav \
$dst/test_wavs/8k.wav
sherpa-onnx-offline \
--encoder=$dst/encoder-epoch-50-avg-15.int8.onnx \
--decoder=$dst/decoder-epoch-50-avg-15.onnx \
--joiner=$dst/joiner-epoch-50-avg-15.int8.onnx \
--tokens=$dst/tokens.txt \
$dst/test_wavs/0.wav \
$dst/test_wavs/1.wav \
$dst/test_wavs/8k.wav
}
function export_small() {
echo "----------small subset----------"
src=icefall-asr-zipformer-libriheavy-punc-20230830
dst=sherpa-onnx-zipformer-en-libriheavy-20230830-small-punct-case
mkdir $dst
cp -v $src/data/lang_bpe_756/bpe.model $dst/
cp -v $src/data/lang_bpe_756/tokens.txt $dst/
cp -v $src/exp_small_subset/*.onnx $dst/
download_test_wavs $dst
ls -lh $dst
ls -lh $dst/test_wavs
sherpa-onnx-offline \
--encoder=$dst/encoder-epoch-88-avg-41.onnx \
--decoder=$dst/decoder-epoch-88-avg-41.onnx \
--joiner=$dst/joiner-epoch-88-avg-41.onnx \
--tokens=$dst/tokens.txt \
$dst/test_wavs/0.wav \
$dst/test_wavs/1.wav \
$dst/test_wavs/8k.wav
sherpa-onnx-offline \
--encoder=$dst/encoder-epoch-88-avg-41.int8.onnx \
--decoder=$dst/decoder-epoch-88-avg-41.onnx \
--joiner=$dst/joiner-epoch-88-avg-41.int8.onnx \
--tokens=$dst/tokens.txt \
$dst/test_wavs/0.wav \
$dst/test_wavs/1.wav \
$dst/test_wavs/8k.wav
}
download_model
export_large
export_medium
export_small
rm -rf icefall-asr-zipformer-libriheavy-punc-20230830