Committed by
GitHub
Support https://huggingface.co/Revai/reverb-diarization-v1 (#1437)
正在显示
9 个修改的文件
包含
276 行增加
和
12 行删除
| @@ -4,7 +4,6 @@ on: | @@ -4,7 +4,6 @@ on: | ||
| 4 | push: | 4 | push: |
| 5 | branches: | 5 | branches: |
| 6 | - apk | 6 | - apk |
| 7 | - - android-demo-speaker-diarization-2 | ||
| 8 | 7 | ||
| 9 | workflow_dispatch: | 8 | workflow_dispatch: |
| 10 | 9 | ||
| @@ -76,6 +75,11 @@ jobs: | @@ -76,6 +75,11 @@ jobs: | ||
| 76 | run: | | 75 | run: | |
| 77 | cd scripts/apk | 76 | cd scripts/apk |
| 78 | 77 | ||
| 78 | + total=${{ matrix.total }} | ||
| 79 | + index=${{ matrix.index }} | ||
| 80 | + | ||
| 81 | + python3 ./generate-speaker-diarization-apk-script.py --total $total --index $index | ||
| 82 | + | ||
| 79 | chmod +x build-apk-speaker-diarization.sh | 83 | chmod +x build-apk-speaker-diarization.sh |
| 80 | mv -v ./build-apk-speaker-diarization.sh ../.. | 84 | mv -v ./build-apk-speaker-diarization.sh ../.. |
| 81 | 85 |
| 1 | +name: export-revai-segmentation-to-onnx | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + workflow_dispatch: | ||
| 5 | + | ||
| 6 | +concurrency: | ||
| 7 | + group: export-revai-segmentation-to-onnx-${{ github.ref }} | ||
| 8 | + cancel-in-progress: true | ||
| 9 | + | ||
| 10 | +jobs: | ||
| 11 | + export-revai-segmentation-to-onnx: | ||
| 12 | + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' | ||
| 13 | + name: export revai segmentation models to ONNX | ||
| 14 | + runs-on: ${{ matrix.os }} | ||
| 15 | + strategy: | ||
| 16 | + fail-fast: false | ||
| 17 | + matrix: | ||
| 18 | + os: [macos-latest] | ||
| 19 | + python-version: ["3.10"] | ||
| 20 | + | ||
| 21 | + steps: | ||
| 22 | + - uses: actions/checkout@v4 | ||
| 23 | + | ||
| 24 | + - name: Setup Python ${{ matrix.python-version }} | ||
| 25 | + uses: actions/setup-python@v5 | ||
| 26 | + with: | ||
| 27 | + python-version: ${{ matrix.python-version }} | ||
| 28 | + | ||
| 29 | + - name: Install pyannote | ||
| 30 | + shell: bash | ||
| 31 | + run: | | ||
| 32 | + pip install pyannote.audio onnx==1.15.0 onnxruntime==1.16.3 | ||
| 33 | + | ||
| 34 | + - name: Run | ||
| 35 | + shell: bash | ||
| 36 | + run: | | ||
| 37 | + d=sherpa-onnx-reverb-diarization-v1 | ||
| 38 | + src=$PWD/$d | ||
| 39 | + mkdir -p $src | ||
| 40 | + | ||
| 41 | + pushd scripts/pyannote/segmentation | ||
| 42 | + ./run-revai.sh | ||
| 43 | + cp ./*.onnx $src/ | ||
| 44 | + cp ./README.md $src/ | ||
| 45 | + cp ./LICENSE $src/ | ||
| 46 | + cp ./run-revai.sh $src/run.sh | ||
| 47 | + cp ./*.py $src/ | ||
| 48 | + | ||
| 49 | + popd | ||
| 50 | + ls -lh $d | ||
| 51 | + tar cjfv $d.tar.bz2 $d | ||
| 52 | + | ||
| 53 | + - name: Release | ||
| 54 | + uses: svenstaro/upload-release-action@v2 | ||
| 55 | + with: | ||
| 56 | + file_glob: true | ||
| 57 | + file: ./*.tar.bz2 | ||
| 58 | + overwrite: true | ||
| 59 | + repo_name: k2-fsa/sherpa-onnx | ||
| 60 | + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | ||
| 61 | + tag: speaker-segmentation-models | ||
| 62 | + | ||
| 63 | + - name: Publish to huggingface | ||
| 64 | + env: | ||
| 65 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 66 | + uses: nick-fields/retry@v3 | ||
| 67 | + with: | ||
| 68 | + max_attempts: 20 | ||
| 69 | + timeout_seconds: 200 | ||
| 70 | + shell: bash | ||
| 71 | + command: | | ||
| 72 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 73 | + git config --global user.name "Fangjun Kuang" | ||
| 74 | + | ||
| 75 | + d=sherpa-onnx-reverb-diarization-v1 | ||
| 76 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 77 | + export GIT_CLONE_PROTECTION_ACTIVE=false | ||
| 78 | + git clone https://huggingface.co/csukuangfj/$d huggingface | ||
| 79 | + cp -v $d/* ./huggingface | ||
| 80 | + cd huggingface | ||
| 81 | + git lfs track "*.onnx" | ||
| 82 | + git status | ||
| 83 | + git add . | ||
| 84 | + git status | ||
| 85 | + git commit -m "add models" | ||
| 86 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main |
| @@ -31,15 +31,24 @@ log "====================x86====================" | @@ -31,15 +31,24 @@ log "====================x86====================" | ||
| 31 | 31 | ||
| 32 | mkdir -p apks | 32 | mkdir -p apks |
| 33 | 33 | ||
| 34 | +{% for model in model_list %} | ||
| 35 | + | ||
| 34 | pushd ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/ | 36 | pushd ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/ |
| 35 | 37 | ||
| 36 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 37 | -tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 38 | -rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 39 | -mv sherpa-onnx-pyannote-segmentation-3-0/model.onnx segmentation.onnx | ||
| 40 | -rm -rf sherpa-onnx-pyannote-segmentation-3-0 | 38 | +ls -lh |
| 39 | + | ||
| 40 | +model_name={{ model.model_name }} | ||
| 41 | +short_name={{ model.short_name }} | ||
| 41 | 42 | ||
| 42 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | 43 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/$model_name.tar.bz2 |
| 44 | +tar xvf $model_name.tar.bz2 | ||
| 45 | +rm $model_name.tar.bz2 | ||
| 46 | +mv $model_name/model.onnx segmentation.onnx | ||
| 47 | +rm -rf $model_name | ||
| 48 | + | ||
| 49 | +if [ ! -f 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then | ||
| 50 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | ||
| 51 | +fi | ||
| 43 | 52 | ||
| 44 | echo "pwd: $PWD" | 53 | echo "pwd: $PWD" |
| 45 | ls -lh | 54 | ls -lh |
| @@ -65,9 +74,13 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do | @@ -65,9 +74,13 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do | ||
| 65 | ./gradlew build | 74 | ./gradlew build |
| 66 | popd | 75 | popd |
| 67 | 76 | ||
| 68 | - mv android/SherpaOnnxSpeakerDiarization/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-speaker-diarization-pyannote_audio-3dspeaker.apk | 77 | + mv android/SherpaOnnxSpeakerDiarization/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-speaker-diarization-$short_name-3dspeaker.apk |
| 69 | ls -lh apks | 78 | ls -lh apks |
| 70 | rm -v ./android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/$arch/*.so | 79 | rm -v ./android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/$arch/*.so |
| 71 | done | 80 | done |
| 72 | 81 | ||
| 82 | +rm -rf ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/segmentation.onnx | ||
| 83 | + | ||
| 84 | +{% endfor %} | ||
| 85 | + | ||
| 73 | ls -lh apks | 86 | ls -lh apks |
| 1 | +#!/usr/bin/env python3 | ||
| 2 | + | ||
| 3 | +import argparse | ||
| 4 | +from dataclasses import dataclass | ||
| 5 | +from typing import List | ||
| 6 | + | ||
| 7 | +import jinja2 | ||
| 8 | + | ||
| 9 | + | ||
| 10 | +def get_args(): | ||
| 11 | + parser = argparse.ArgumentParser() | ||
| 12 | + parser.add_argument( | ||
| 13 | + "--total", | ||
| 14 | + type=int, | ||
| 15 | + default=1, | ||
| 16 | + help="Number of runners", | ||
| 17 | + ) | ||
| 18 | + parser.add_argument( | ||
| 19 | + "--index", | ||
| 20 | + type=int, | ||
| 21 | + default=0, | ||
| 22 | + help="Index of the current runner", | ||
| 23 | + ) | ||
| 24 | + return parser.parse_args() | ||
| 25 | + | ||
| 26 | + | ||
| 27 | +@dataclass | ||
| 28 | +class SpeakerSegmentationModel: | ||
| 29 | + model_name: str | ||
| 30 | + short_name: str = "" | ||
| 31 | + | ||
| 32 | + | ||
| 33 | +def get_models() -> List[SpeakerSegmentationModel]: | ||
| 34 | + models = [ | ||
| 35 | + SpeakerSegmentationModel( | ||
| 36 | + model_name="sherpa-onnx-pyannote-segmentation-3-0", | ||
| 37 | + short_name="pyannote_audio", | ||
| 38 | + ), | ||
| 39 | + SpeakerSegmentationModel( | ||
| 40 | + model_name="sherpa-onnx-reverb-diarization-v1", | ||
| 41 | + short_name="revai_v1", | ||
| 42 | + ), | ||
| 43 | + ] | ||
| 44 | + | ||
| 45 | + return models | ||
| 46 | + | ||
| 47 | + | ||
| 48 | +def main(): | ||
| 49 | + args = get_args() | ||
| 50 | + index = args.index | ||
| 51 | + total = args.total | ||
| 52 | + assert 0 <= index < total, (index, total) | ||
| 53 | + | ||
| 54 | + all_model_list = get_models() | ||
| 55 | + | ||
| 56 | + num_models = len(all_model_list) | ||
| 57 | + | ||
| 58 | + num_per_runner = num_models // total | ||
| 59 | + if num_per_runner <= 0: | ||
| 60 | + raise ValueError(f"num_models: {num_models}, num_runners: {total}") | ||
| 61 | + | ||
| 62 | + start = index * num_per_runner | ||
| 63 | + end = start + num_per_runner | ||
| 64 | + | ||
| 65 | + remaining = num_models - args.total * num_per_runner | ||
| 66 | + | ||
| 67 | + print(f"{index}/{total}: {start}-{end}/{num_models}") | ||
| 68 | + | ||
| 69 | + d = dict() | ||
| 70 | + d["model_list"] = all_model_list[start:end] | ||
| 71 | + if index < remaining: | ||
| 72 | + s = args.total * num_per_runner + index | ||
| 73 | + d["model_list"].append(all_model_list[s]) | ||
| 74 | + print(f"{s}/{num_models}") | ||
| 75 | + | ||
| 76 | + filename_list = ["./build-apk-speaker-diarization.sh"] | ||
| 77 | + for filename in filename_list: | ||
| 78 | + environment = jinja2.Environment() | ||
| 79 | + with open(f"{filename}.in") as f: | ||
| 80 | + s = f.read() | ||
| 81 | + template = environment.from_string(s) | ||
| 82 | + | ||
| 83 | + s = template.render(**d) | ||
| 84 | + with open(filename, "w") as f: | ||
| 85 | + print(s, file=f) | ||
| 86 | + | ||
| 87 | + | ||
| 88 | +if __name__ == "__main__": | ||
| 89 | + main() |
| 1 | #!/usr/bin/env python3 | 1 | #!/usr/bin/env python3 |
| 2 | +# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| 2 | 3 | ||
| 4 | +import os | ||
| 3 | from typing import Any, Dict | 5 | from typing import Any, Dict |
| 4 | 6 | ||
| 5 | import onnx | 7 | import onnx |
| @@ -35,6 +37,8 @@ def add_meta_data(filename: str, meta_data: Dict[str, Any]): | @@ -35,6 +37,8 @@ def add_meta_data(filename: str, meta_data: Dict[str, Any]): | ||
| 35 | def main(): | 37 | def main(): |
| 36 | # You can download ./pytorch_model.bin from | 38 | # You can download ./pytorch_model.bin from |
| 37 | # https://hf-mirror.com/csukuangfj/pyannote-models/tree/main/segmentation-3.0 | 39 | # https://hf-mirror.com/csukuangfj/pyannote-models/tree/main/segmentation-3.0 |
| 40 | + # or from | ||
| 41 | + # https://huggingface.co/Revai/reverb-diarization-v1/tree/main | ||
| 38 | pt_filename = "./pytorch_model.bin" | 42 | pt_filename = "./pytorch_model.bin" |
| 39 | model = Model.from_pretrained(pt_filename) | 43 | model = Model.from_pretrained(pt_filename) |
| 40 | model.eval() | 44 | model.eval() |
| @@ -94,6 +98,22 @@ def main(): | @@ -94,6 +98,22 @@ def main(): | ||
| 94 | receptive_field_size = int(model.receptive_field.duration * 16000) | 98 | receptive_field_size = int(model.receptive_field.duration * 16000) |
| 95 | receptive_field_shift = int(model.receptive_field.step * 16000) | 99 | receptive_field_shift = int(model.receptive_field.step * 16000) |
| 96 | 100 | ||
| 101 | + is_revai = os.getenv("SHERPA_ONNX_IS_REVAI", "") | ||
| 102 | + if is_revai == "": | ||
| 103 | + url_1 = "https://huggingface.co/pyannote/segmentation-3.0" | ||
| 104 | + url_2 = "https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0" | ||
| 105 | + license_url = ( | ||
| 106 | + "https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE" | ||
| 107 | + ) | ||
| 108 | + model_author = "pyannote-audio" | ||
| 109 | + else: | ||
| 110 | + url_1 = "https://huggingface.co/Revai/reverb-diarization-v1" | ||
| 111 | + url_2 = "https://huggingface.co/csukuangfj/sherpa-onnx-reverb-diarization-v1" | ||
| 112 | + license_url = ( | ||
| 113 | + "https://huggingface.co/Revai/reverb-diarization-v1/blob/main/LICENSE" | ||
| 114 | + ) | ||
| 115 | + model_author = "Revai" | ||
| 116 | + | ||
| 97 | meta_data = { | 117 | meta_data = { |
| 98 | "num_speakers": len(model.specifications.classes), | 118 | "num_speakers": len(model.specifications.classes), |
| 99 | "powerset_max_classes": model.specifications.powerset_max_classes, | 119 | "powerset_max_classes": model.specifications.powerset_max_classes, |
| @@ -104,11 +124,11 @@ def main(): | @@ -104,11 +124,11 @@ def main(): | ||
| 104 | "receptive_field_shift": receptive_field_shift, | 124 | "receptive_field_shift": receptive_field_shift, |
| 105 | "model_type": "pyannote-segmentation-3.0", | 125 | "model_type": "pyannote-segmentation-3.0", |
| 106 | "version": "1", | 126 | "version": "1", |
| 107 | - "model_author": "pyannote", | 127 | + "model_author": model_author, |
| 108 | "maintainer": "k2-fsa", | 128 | "maintainer": "k2-fsa", |
| 109 | - "url_1": "https://huggingface.co/pyannote/segmentation-3.0", | ||
| 110 | - "url_2": "https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0", | ||
| 111 | - "license": "https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE", | 129 | + "url_1": url_1, |
| 130 | + "url_2": url_2, | ||
| 131 | + "license": license_url, | ||
| 112 | } | 132 | } |
| 113 | add_meta_data(filename=filename, meta_data=meta_data) | 133 | add_meta_data(filename=filename, meta_data=meta_data) |
| 114 | 134 |
| 1 | #!/usr/bin/env bash | 1 | #!/usr/bin/env bash |
| 2 | +# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| 2 | 3 | ||
| 3 | 4 | ||
| 4 | python3 -m onnxruntime.quantization.preprocess --input model.onnx --output tmp.preprocessed.onnx | 5 | python3 -m onnxruntime.quantization.preprocess --input model.onnx --output tmp.preprocessed.onnx |
scripts/pyannote/segmentation/run-revai.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | +# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| 3 | + | ||
| 4 | +export SHERPA_ONNX_IS_REVAI=1 | ||
| 5 | + | ||
| 6 | +set -ex | ||
| 7 | +function install_pyannote() { | ||
| 8 | + pip install pyannote.audio onnx onnxruntime | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +function download_test_files() { | ||
| 12 | + curl -SL -O https://huggingface.co/Revai/reverb-diarization-v1/resolve/main/pytorch_model.bin | ||
| 13 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
| 14 | +} | ||
| 15 | + | ||
| 16 | +install_pyannote | ||
| 17 | +download_test_files | ||
| 18 | + | ||
| 19 | +./export-onnx.py | ||
| 20 | +./preprocess.sh | ||
| 21 | + | ||
| 22 | +echo "----------torch----------" | ||
| 23 | +./vad-torch.py | ||
| 24 | + | ||
| 25 | +echo "----------onnx model.onnx----------" | ||
| 26 | +./vad-onnx.py --model ./model.onnx --wav ./lei-jun-test.wav | ||
| 27 | + | ||
| 28 | +echo "----------onnx model.int8.onnx----------" | ||
| 29 | +./vad-onnx.py --model ./model.int8.onnx --wav ./lei-jun-test.wav | ||
| 30 | + | ||
| 31 | +curl -SL -O https://huggingface.co/Revai/reverb-diarization-v1/resolve/main/LICENSE | ||
| 32 | + | ||
| 33 | +cat >README.md << EOF | ||
| 34 | +# Introduction | ||
| 35 | + | ||
| 36 | +Models in this file are converted from | ||
| 37 | +https://huggingface.co/Revai/reverb-diarization-v1/tree/main | ||
| 38 | + | ||
| 39 | +Note that it is accessible under a non-commercial license. | ||
| 40 | + | ||
| 41 | +Please see ./LICENSE for details. | ||
| 42 | + | ||
| 43 | +See also | ||
| 44 | +https://www.rev.com/blog/speech-to-text-technology/introducing-reverb-open-source-asr-diarization | ||
| 45 | + | ||
| 46 | +EOF | ||
| 47 | + | ||
| 48 | + |
| @@ -216,6 +216,8 @@ def main(): | @@ -216,6 +216,8 @@ def main(): | ||
| 216 | 216 | ||
| 217 | is_active = classification[0] > onset | 217 | is_active = classification[0] > onset |
| 218 | start = None | 218 | start = None |
| 219 | + if is_active: | ||
| 220 | + start = 0 | ||
| 219 | 221 | ||
| 220 | scale = m.receptive_field_shift / m.sample_rate | 222 | scale = m.receptive_field_shift / m.sample_rate |
| 221 | scale_offset = m.receptive_field_size / m.sample_rate * 0.5 | 223 | scale_offset = m.receptive_field_size / m.sample_rate * 0.5 |
-
请 注册 或 登录 后发表评论