Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-09-18 13:43:49 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-09-18 13:43:49 +0800
Commit
576a3aa90d663db6d5d74051482897d0538c97d4
576a3aa9
1 parent
7e642325
Add non-streaming ONNX models for Russian ASR (#1358)
隐藏空白字符变更
内嵌
并排对比
正在显示
6 个修改的文件
包含
228 行增加
和
15 行删除
.github/scripts/test-offline-transducer.sh
.github/workflows/export-russian-onnx-models.yaml
.github/workflows/linux.yaml
.github/workflows/macos.yaml
scripts/apk/generate-vad-asr-apk-script.py
sherpa-onnx/kotlin-api/OfflineRecognizer.kt
.github/scripts/test-offline-transducer.sh
查看文件 @
576a3aa
...
...
@@ -16,6 +16,46 @@ echo "PATH: $PATH"
which
$EXE
log
"------------------------------------------------------------------------"
log
"Run zipformer transducer models (Russian) "
log
"------------------------------------------------------------------------"
for
type
in
small-zipformer zipformer;
do
url
=
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-
$type
-ru-2024-09-18.tar.bz2
name
=
$(
basename
$url
)
curl -SL -O
$url
tar xvf
$name
rm
$name
repo
=
$(
basename -s .tar.bz2
$name
)
ls -lh
$repo
log
"test
$repo
"
test_wavs
=(
0.wav
1.wav
)
for
w
in
${
test_wavs
[@]
}
;
do
time
$EXE
\
--tokens
=
$repo
/tokens.txt
\
--encoder
=
$repo
/encoder.onnx
\
--decoder
=
$repo
/decoder.onnx
\
--joiner
=
$repo
/joiner.onnx
\
--debug
=
1
\
$repo
/test_wavs/
$w
done
for
w
in
${
test_wavs
[@]
}
;
do
time
$EXE
\
--tokens
=
$repo
/tokens.txt
\
--encoder
=
$repo
/encoder.int8.onnx
\
--decoder
=
$repo
/decoder.onnx
\
--joiner
=
$repo
/joiner.int8.onnx
\
--debug
=
1
\
$repo
/test_wavs/
$w
done
rm -rf
$repo
done
log
"------------------------------------------------------------------------"
log
"Run zipformer transducer models (Japanese from ReazonSpeech) "
log
"------------------------------------------------------------------------"
url
=
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
...
...
.github/workflows/export-russian-onnx-models.yaml
0 → 100644
查看文件 @
576a3aa
name
:
export-russian-onnx-models
on
:
workflow_dispatch
:
concurrency
:
group
:
export-russian-onnx-models-${{ github.ref }}
cancel-in-progress
:
true
jobs
:
export-russian-onnx-models
:
if
:
github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
name
:
export Russian onnx models
runs-on
:
${{ matrix.os }}
strategy
:
fail-fast
:
false
matrix
:
os
:
[
macos-latest
]
python-version
:
[
"
3.8"
]
steps
:
-
uses
:
actions/checkout@v4
-
name
:
vosk-model-ru (zipformer v1)
shell
:
bash
run
:
|
cat >README.md <<EOF
# Introduction
Models in this directory are from
https://huggingface.co/alphacep/vosk-model-ru/tree/main
EOF
cat README.md
d=sherpa-onnx-zipformer-ru-2024-09-18
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/lang/bpe.model
curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/lang/tokens.txt
curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/encoder.int8.onnx
curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/decoder.int8.onnx
curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/joiner.int8.onnx
curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/encoder.onnx
curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/decoder.onnx
curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/joiner.onnx
mkdir test_wavs
cd test_wavs
curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/russian-i-love-you.wav
curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/test.wav
mv russian-i-love-you.wav 0.wav
mv test.wav 1.wav
popd
ls -lh $d
tar cjvf $d.tar.bz2 $d
rm -rf $d
-
name
:
vosk-model-ru-small (zipformer v1)
shell
:
bash
run
:
|
cat >README.md <<EOF
# Introduction
Models in this directory are from
https://huggingface.co/alphacep/vosk-model-small-ru/tree/main
EOF
cat README.md
d=sherpa-onnx-small-zipformer-ru-2024-09-18
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/lang/bpe.model
curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/lang/tokens.txt
curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/encoder.int8.onnx
curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/decoder.int8.onnx
curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/joiner.int8.onnx
curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/encoder.onnx
curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/decoder.onnx
curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/joiner.onnx
mkdir test_wavs
cd test_wavs
curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/russian-i-love-you.wav
curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/test.wav
mv russian-i-love-you.wav 0.wav
mv test.wav 1.wav
popd
ls -lh $d
tar cjvf $d.tar.bz2 $d
rm -rf $d
-
name
:
Release
uses
:
svenstaro/upload-release-action@v2
with
:
file_glob
:
true
file
:
./*.tar.bz2
overwrite
:
true
repo_name
:
k2-fsa/sherpa-onnx
repo_token
:
${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag
:
asr-models
...
...
.github/workflows/linux.yaml
查看文件 @
576a3aa
...
...
@@ -143,34 +143,34 @@ jobs:
name
:
release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path
:
install/*
-
name
:
Test offline
CTC
-
name
:
Test offline
transducer
shell
:
bash
run
:
|
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-
ctc
.sh
.github/scripts/test-offline-
transducer
.sh
du -h -d1 .
-
name
:
Test o
nline punctuation
-
name
:
Test o
ffline CTC
shell
:
bash
run
:
|
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-o
nline-punctuation
export EXE=sherpa-onnx-o
ffline
.github/scripts/test-o
nline-punctuation
.sh
.github/scripts/test-o
ffline-ctc
.sh
du -h -d1 .
-
name
:
Test o
ffline transducer
-
name
:
Test o
nline punctuation
shell
:
bash
run
:
|
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-o
ffline
export EXE=sherpa-onnx-o
nline-punctuation
.github/scripts/test-o
ffline-transducer
.sh
.github/scripts/test-o
nline-punctuation
.sh
du -h -d1 .
-
name
:
Test online transducer
...
...
.github/workflows/macos.yaml
查看文件 @
576a3aa
...
...
@@ -115,22 +115,21 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
-
name
:
Test o
nline punctuation
-
name
:
Test o
ffline transducer
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-o
nline-punctuation
export EXE=sherpa-onnx-o
ffline
.github/scripts/test-o
nline-punctuation
.sh
.github/scripts/test-o
ffline-transducer
.sh
-
name
:
Test o
ffline transducer
-
name
:
Test o
nline punctuation
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-transducer.sh
export EXE=sherpa-onnx-online-punctuation
.github/scripts/test-online-punctuation.sh
-
name
:
Test offline CTC
shell
:
bash
...
...
scripts/apk/generate-vad-asr-apk-script.py
查看文件 @
576a3aa
...
...
@@ -296,6 +296,46 @@ def get_models():
popd
"""
,
),
Model
(
model_name
=
"sherpa-onnx-zipformer-ru-2024-09-18"
,
idx
=
17
,
lang
=
"ru"
,
short_name
=
"zipformer"
,
cmd
=
"""
pushd $model_name
rm -rfv test_wavs
rm -fv encoder.onnx
rm -fv decoder.int8.onnx
rm -fv joiner.onnx
rm -fv bpe.model
ls -lh
popd
"""
,
),
Model
(
model_name
=
"sherpa-onnx-small-zipformer-ru-2024-09-18"
,
idx
=
18
,
lang
=
"ru"
,
short_name
=
"small_zipformer"
,
cmd
=
"""
pushd $model_name
rm -rfv test_wavs
rm -fv encoder.onnx
rm -fv decoder.int8.onnx
rm -fv joiner.onnx
rm -fv bpe.model
ls -lh
popd
"""
,
),
]
return
models
...
...
sherpa-onnx/kotlin-api/OfflineRecognizer.kt
查看文件 @
576a3aa
...
...
@@ -368,6 +368,32 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
modelType = "transducer",
)
}
17 -> {
val modelDir = "sherpa-onnx-zipformer-ru-2024-09-18"
return OfflineModelConfig(
transducer = OfflineTransducerModelConfig(
encoder = "$modelDir/encoder.int8.onnx",
decoder = "$modelDir/decoder.onnx",
joiner = "$modelDir/joiner.int8.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "transducer",
)
}
18 -> {
val modelDir = "sherpa-onnx-small-zipformer-ru-2024-09-18"
return OfflineModelConfig(
transducer = OfflineTransducerModelConfig(
encoder = "$modelDir/encoder.int8.onnx",
decoder = "$modelDir/decoder.onnx",
joiner = "$modelDir/joiner.int8.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "transducer",
)
}
}
return null
}
...
...
请
注册
或
登录
后发表评论