正在显示
12 个修改的文件
包含
408 行增加
和
51 行删除
| @@ -2,7 +2,13 @@ | @@ -2,7 +2,13 @@ | ||
| 2 | 2 | ||
| 3 | cd dotnet-examples/ | 3 | cd dotnet-examples/ |
| 4 | 4 | ||
| 5 | -cd ./offline-decode-files | 5 | +cd ./offline-speaker-diarization |
| 6 | +./run.sh | ||
| 7 | +rm -rfv *.onnx | ||
| 8 | +rm -fv *.wav | ||
| 9 | +rm -rfv sherpa-onnx-pyannote-* | ||
| 10 | + | ||
| 11 | +cd ../offline-decode-files | ||
| 6 | ./run-sense-voice-ctc.sh | 12 | ./run-sense-voice-ctc.sh |
| 7 | rm -rf sherpa-onnx-* | 13 | rm -rf sherpa-onnx-* |
| 8 | 14 |
| @@ -47,53 +47,10 @@ jobs: | @@ -47,53 +47,10 @@ jobs: | ||
| 47 | with: | 47 | with: |
| 48 | fetch-depth: 0 | 48 | fetch-depth: 0 |
| 49 | 49 | ||
| 50 | - - name: Free space | ||
| 51 | - if: matrix.os == 'ubuntu-latest' | ||
| 52 | - shell: bash | ||
| 53 | - run: | | ||
| 54 | - df -h | ||
| 55 | - rm -rf /opt/hostedtoolcache | ||
| 56 | - df -h | ||
| 57 | - | ||
| 58 | - - name: Free more space | ||
| 59 | - if: matrix.os == 'ubuntu-latest' | ||
| 60 | - shell: bash | ||
| 61 | - run: | | ||
| 62 | - # https://github.com/orgs/community/discussions/25678 | ||
| 63 | - cd /opt | ||
| 64 | - find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';' | ||
| 65 | - | ||
| 66 | - sudo rm -rf /usr/share/dotnet | ||
| 67 | - sudo rm -rf "/usr/local/share/boost" | ||
| 68 | - sudo rm -rf "$AGENT_TOOLSDIRECTORY" | ||
| 69 | - | ||
| 70 | - - name: Free Disk Space (Ubuntu) | ||
| 71 | - if: matrix.os == 'ubuntu-latest' | ||
| 72 | - uses: jlumbroso/free-disk-space@main | ||
| 73 | - with: | ||
| 74 | - # this might remove tools that are actually needed, | ||
| 75 | - # if set to "true" but frees about 6 GB | ||
| 76 | - tool-cache: false | ||
| 77 | - | ||
| 78 | - # all of these default to true, but feel free to set to | ||
| 79 | - # "false" if necessary for your workflow | ||
| 80 | - android: true | ||
| 81 | - dotnet: false | ||
| 82 | - haskell: true | ||
| 83 | - large-packages: true | ||
| 84 | - docker-images: false | ||
| 85 | - swap-storage: true | ||
| 86 | - | ||
| 87 | - - name: Check space | ||
| 88 | - if: matrix.os == 'ubuntu-latest' | ||
| 89 | - shell: bash | ||
| 90 | - run: | | ||
| 91 | - df -h | ||
| 92 | - | ||
| 93 | - name: ccache | 50 | - name: ccache |
| 94 | uses: hendrikmuhs/ccache-action@v1.2 | 51 | uses: hendrikmuhs/ccache-action@v1.2 |
| 95 | with: | 52 | with: |
| 96 | - key: ${{ matrix.os }}-release-shared | 53 | + key: ${{ matrix.os }}-dotnet-release-shared |
| 97 | 54 | ||
| 98 | - name: Build sherpa-onnx | 55 | - name: Build sherpa-onnx |
| 99 | shell: bash | 56 | shell: bash |
| @@ -110,11 +67,16 @@ jobs: | @@ -110,11 +67,16 @@ jobs: | ||
| 110 | -DCMAKE_BUILD_TYPE=Release \ | 67 | -DCMAKE_BUILD_TYPE=Release \ |
| 111 | -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ | 68 | -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ |
| 112 | -DBUILD_ESPEAK_NG_EXE=OFF \ | 69 | -DBUILD_ESPEAK_NG_EXE=OFF \ |
| 113 | - -DSHERPA_ONNX_ENABLE_BINARY=ON \ | 70 | + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ |
| 114 | .. | 71 | .. |
| 115 | 72 | ||
| 116 | cmake --build . --target install --config Release | 73 | cmake --build . --target install --config Release |
| 117 | 74 | ||
| 75 | + rm -rf install/share | ||
| 76 | + rm -rf install/lib/pkg* | ||
| 77 | + | ||
| 78 | + ls -lh ./install/lib | ||
| 79 | + | ||
| 118 | - uses: actions/upload-artifact@v4 | 80 | - uses: actions/upload-artifact@v4 |
| 119 | with: | 81 | with: |
| 120 | name: ${{ matrix.os }} | 82 | name: ${{ matrix.os }} |
| @@ -148,7 +110,7 @@ jobs: | @@ -148,7 +110,7 @@ jobs: | ||
| 148 | uses: actions/download-artifact@v4 | 110 | uses: actions/download-artifact@v4 |
| 149 | with: | 111 | with: |
| 150 | name: ubuntu-latest | 112 | name: ubuntu-latest |
| 151 | - path: /tmp/linux | 113 | + path: /tmp/linux-x64 |
| 152 | 114 | ||
| 153 | - name: Setup .NET | 115 | - name: Setup .NET |
| 154 | uses: actions/setup-dotnet@v4 | 116 | uses: actions/setup-dotnet@v4 |
| @@ -162,17 +124,21 @@ jobs: | @@ -162,17 +124,21 @@ jobs: | ||
| 162 | - name: Display files | 124 | - name: Display files |
| 163 | shell: bash | 125 | shell: bash |
| 164 | run: | | 126 | run: | |
| 165 | - echo "----------/tmp/----------" | ||
| 166 | - ls -lh /tmp/ | 127 | + echo "----------/tmp----------" |
| 128 | + ls -lh /tmp | ||
| 167 | 129 | ||
| 168 | - echo "----------/tmp/linux----------" | ||
| 169 | - ls -lh /tmp/linux | 130 | + echo "----------/tmp/linux-x64----------" |
| 131 | + ls -lh /tmp/linux-x64 | ||
| 132 | + df -h | ||
| 170 | 133 | ||
| 171 | - name: Build | 134 | - name: Build |
| 172 | shell: bash | 135 | shell: bash |
| 173 | run: | | 136 | run: | |
| 174 | cd scripts/dotnet | 137 | cd scripts/dotnet |
| 175 | ./run.sh | 138 | ./run.sh |
| 139 | + df -h | ||
| 140 | + | ||
| 141 | + ls -lh /tmp/packages | ||
| 176 | 142 | ||
| 177 | - name: Copy files | 143 | - name: Copy files |
| 178 | shell: bash | 144 | shell: bash |
| @@ -181,9 +147,14 @@ jobs: | @@ -181,9 +147,14 @@ jobs: | ||
| 181 | 147 | ||
| 182 | ls -lh /tmp | 148 | ls -lh /tmp |
| 183 | 149 | ||
| 150 | + df -h | ||
| 151 | + | ||
| 184 | - name: Run tests | 152 | - name: Run tests |
| 185 | shell: bash | 153 | shell: bash |
| 186 | run: | | 154 | run: | |
| 155 | + dotnet nuget locals all --clear | ||
| 156 | + df -h | ||
| 157 | + | ||
| 187 | .github/scripts/test-dot-net.sh | 158 | .github/scripts/test-dot-net.sh |
| 188 | 159 | ||
| 189 | - uses: actions/upload-artifact@v4 | 160 | - uses: actions/upload-artifact@v4 |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +// | ||
| 3 | + | ||
| 4 | +// This file shows how to use sherpa-onnx C# API for speaker diarization | ||
| 5 | +/* | ||
| 6 | +Usage: | ||
| 7 | + | ||
| 8 | +Step 1: Download a speaker segmentation model | ||
| 9 | + | ||
| 10 | +Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models | ||
| 11 | +for a list of available models. The following is an example | ||
| 12 | + | ||
| 13 | + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 14 | + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 15 | + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 16 | + | ||
| 17 | +Step 2: Download a speaker embedding extractor model | ||
| 18 | + | ||
| 19 | +Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models | ||
| 20 | +for a list of available models. The following is an example | ||
| 21 | + | ||
| 22 | + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | ||
| 23 | + | ||
| 24 | +Step 3. Download test wave files | ||
| 25 | + | ||
| 26 | +Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models | ||
| 27 | +for a list of available test wave files. The following is an example | ||
| 28 | + | ||
| 29 | + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav | ||
| 30 | + | ||
| 31 | +Step 4. Run it | ||
| 32 | + | ||
| 33 | + dotnet run | ||
| 34 | +*/ | ||
| 35 | + | ||
| 36 | +using SherpaOnnx; | ||
| 37 | +using System; | ||
| 38 | + | ||
| 39 | +class OfflineSpeakerDiarizationDemo | ||
| 40 | +{ | ||
| 41 | + static void Main(string[] args) | ||
| 42 | + { | ||
| 43 | + var config = new OfflineSpeakerDiarizationConfig(); | ||
| 44 | + config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"; | ||
| 45 | + config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"; | ||
| 46 | + | ||
| 47 | + // the test wave ./0-four-speakers-zh.wav has 4 speakers, so | ||
| 48 | + // we set num_clusters to 4 | ||
| 49 | + // | ||
| 50 | + config.Clustering.NumClusters = 4; | ||
| 51 | + // If you don't know the number of speakers in the test wave file, please | ||
| 52 | + // use | ||
| 53 | + // config.Clustering.Threshold = 0.5; // You need to tune this threshold | ||
| 54 | + var sd = new OfflineSpeakerDiarization(config); | ||
| 55 | + | ||
| 56 | + var testWaveFile = "./0-four-speakers-zh.wav"; | ||
| 57 | + WaveReader waveReader = new WaveReader(testWaveFile); | ||
| 58 | + if (sd.SampleRate != waveReader.SampleRate) | ||
| 59 | + { | ||
| 60 | + Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}"); | ||
| 61 | + return; | ||
| 62 | + } | ||
| 63 | + | ||
| 64 | + Console.WriteLine("Started"); | ||
| 65 | + | ||
| 66 | + // var segments = sd.Process(waveReader.Samples); // this one is also ok | ||
| 67 | + | ||
| 68 | + var MyProgressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) => | ||
| 69 | + { | ||
| 70 | + float progress = 100.0F * numProcessedChunks / numTotalChunks; | ||
| 71 | + Console.WriteLine("Progress {0}%", String.Format("{0:0.00}", progress)); | ||
| 72 | + return 0; | ||
| 73 | + }; | ||
| 74 | + | ||
| 75 | + var callback = new OfflineSpeakerDiarizationProgressCallback(MyProgressCallback); | ||
| 76 | + var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero); | ||
| 77 | + | ||
| 78 | + foreach (var s in segments) | ||
| 79 | + { | ||
| 80 | + Console.WriteLine("{0} -- {1} speaker_{2}", String.Format("{0:0.00}", s.Start), String.Format("{0:0.00}", s.End), s.Speaker); | ||
| 81 | + } | ||
| 82 | + } | ||
| 83 | +} |
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + | ||
| 3 | + <PropertyGroup> | ||
| 4 | + <OutputType>Exe</OutputType> | ||
| 5 | + <TargetFramework>net6.0</TargetFramework> | ||
| 6 | + <RootNamespace>offline_speaker_diarization</RootNamespace> | ||
| 7 | + <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | + <Nullable>enable</Nullable> | ||
| 9 | + </PropertyGroup> | ||
| 10 | + | ||
| 11 | + <ItemGroup> | ||
| 12 | + <ProjectReference Include="..\Common\Common.csproj" /> | ||
| 13 | + </ItemGroup> | ||
| 14 | + | ||
| 15 | +</Project> |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | + | ||
| 4 | +if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then | ||
| 5 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 6 | + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 7 | + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then | ||
| 11 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | ||
| 12 | +fi | ||
| 13 | + | ||
| 14 | +if [ ! -f ./0-four-speakers-zh.wav ]; then | ||
| 15 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav | ||
| 16 | +fi | ||
| 17 | + | ||
| 18 | +dotnet run |
| @@ -31,6 +31,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-micro | @@ -31,6 +31,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-micro | ||
| 31 | EndProject | 31 | EndProject |
| 32 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}" | 32 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}" |
| 33 | EndProject | 33 | EndProject |
| 34 | +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}" | ||
| 35 | +EndProject | ||
| 34 | Global | 36 | Global |
| 35 | GlobalSection(SolutionConfigurationPlatforms) = preSolution | 37 | GlobalSection(SolutionConfigurationPlatforms) = preSolution |
| 36 | Debug|Any CPU = Debug|Any CPU | 38 | Debug|Any CPU = Debug|Any CPU |
| @@ -93,6 +95,10 @@ Global | @@ -93,6 +95,10 @@ Global | ||
| 93 | {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU | 95 | {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU |
| 94 | {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU | 96 | {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU |
| 95 | {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU | 97 | {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU |
| 98 | + {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| 99 | + {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| 100 | + {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| 101 | + {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| 96 | EndGlobalSection | 102 | EndGlobalSection |
| 97 | GlobalSection(SolutionProperties) = preSolution | 103 | GlobalSection(SolutionProperties) = preSolution |
| 98 | HideSolutionNode = FALSE | 104 | HideSolutionNode = FALSE |
scripts/dotnet/FastClusteringConfig.cs
0 → 100644
| 1 | +/// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + | ||
| 8 | + [StructLayout(LayoutKind.Sequential)] | ||
| 9 | + public struct FastClusteringConfig | ||
| 10 | + { | ||
| 11 | + public FastClusteringConfig() | ||
| 12 | + { | ||
| 13 | + NumClusters = -1; | ||
| 14 | + Threshold = 0.5F; | ||
| 15 | + } | ||
| 16 | + | ||
| 17 | + public int NumClusters; | ||
| 18 | + public float Threshold; | ||
| 19 | + } | ||
| 20 | +} |
scripts/dotnet/OfflineSpeakerDiarization.cs
0 → 100644
| 1 | +/// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +using System; | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | +using System.Text; | ||
| 5 | + | ||
| 6 | +namespace SherpaOnnx | ||
| 7 | +{ | ||
| 8 | + // IntPtr is actually a `const float*` from C++ | ||
| 9 | + public delegate int OfflineSpeakerDiarizationProgressCallback(int numProcessedChunks, int numTotalChunks, IntPtr arg); | ||
| 10 | + | ||
| 11 | + public class OfflineSpeakerDiarization : IDisposable | ||
| 12 | + { | ||
| 13 | + public OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) | ||
| 14 | + { | ||
| 15 | + IntPtr h = SherpaOnnxCreateOfflineSpeakerDiarization(ref config); | ||
| 16 | + _handle = new HandleRef(this, h); | ||
| 17 | + } | ||
| 18 | + | ||
| 19 | + public OfflineSpeakerDiarizationSegment[] Process(float[] samples) | ||
| 20 | + { | ||
| 21 | + IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcess(_handle.Handle, samples, samples.Length); | ||
| 22 | + return ProcessImpl(result); | ||
| 23 | + } | ||
| 24 | + | ||
| 25 | + public OfflineSpeakerDiarizationSegment[] ProcessWithCallback(float[] samples, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg) | ||
| 26 | + { | ||
| 27 | + IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(_handle.Handle, samples, samples.Length, callback, arg); | ||
| 28 | + return ProcessImpl(result); | ||
| 29 | + } | ||
| 30 | + | ||
| 31 | + private OfflineSpeakerDiarizationSegment[] ProcessImpl(IntPtr result) | ||
| 32 | + { | ||
| 33 | + if (result == IntPtr.Zero) | ||
| 34 | + { | ||
| 35 | + return new OfflineSpeakerDiarizationSegment[] {}; | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + int numSegments = SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result); | ||
| 39 | + IntPtr p = SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result); | ||
| 40 | + | ||
| 41 | + OfflineSpeakerDiarizationSegment[] ans = new OfflineSpeakerDiarizationSegment[numSegments]; | ||
| 42 | + unsafe | ||
| 43 | + { | ||
| 44 | + int size = sizeof(float) * 2 + sizeof(int); | ||
| 45 | + for (int i = 0; i != numSegments; ++i) | ||
| 46 | + { | ||
| 47 | + IntPtr t = new IntPtr((byte*)p + i * size); | ||
| 48 | + ans[i] = new OfflineSpeakerDiarizationSegment(t); | ||
| 49 | + | ||
| 50 | + // The following IntPtr.Add() does not support net20 | ||
| 51 | + // ans[i] = new OfflineSpeakerDiarizationSegment(IntPtr.Add(p, i)); | ||
| 52 | + } | ||
| 53 | + } | ||
| 54 | + | ||
| 55 | + | ||
| 56 | + SherpaOnnxOfflineSpeakerDiarizationDestroySegment(p); | ||
| 57 | + SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result); | ||
| 58 | + | ||
| 59 | + return ans; | ||
| 60 | + | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + public void Dispose() | ||
| 64 | + { | ||
| 65 | + Cleanup(); | ||
| 66 | + // Prevent the object from being placed on the | ||
| 67 | + // finalization queue | ||
| 68 | + System.GC.SuppressFinalize(this); | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + ~OfflineSpeakerDiarization() | ||
| 72 | + { | ||
| 73 | + Cleanup(); | ||
| 74 | + } | ||
| 75 | + | ||
| 76 | + private void Cleanup() | ||
| 77 | + { | ||
| 78 | + SherpaOnnxDestroyOfflineSpeakerDiarization(_handle.Handle); | ||
| 79 | + | ||
| 80 | + // Don't permit the handle to be used again. | ||
| 81 | + _handle = new HandleRef(this, IntPtr.Zero); | ||
| 82 | + } | ||
| 83 | + | ||
| 84 | + private HandleRef _handle; | ||
| 85 | + | ||
| 86 | + public int SampleRate | ||
| 87 | + { | ||
| 88 | + get | ||
| 89 | + { | ||
| 90 | + return SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(_handle.Handle); | ||
| 91 | + } | ||
| 92 | + } | ||
| 93 | + | ||
| 94 | + [DllImport(Dll.Filename)] | ||
| 95 | + private static extern IntPtr SherpaOnnxCreateOfflineSpeakerDiarization(ref OfflineSpeakerDiarizationConfig config); | ||
| 96 | + | ||
| 97 | + [DllImport(Dll.Filename)] | ||
| 98 | + private static extern void SherpaOnnxDestroyOfflineSpeakerDiarization(IntPtr handle); | ||
| 99 | + | ||
| 100 | + [DllImport(Dll.Filename)] | ||
| 101 | + private static extern int SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(IntPtr handle); | ||
| 102 | + | ||
| 103 | + [DllImport(Dll.Filename)] | ||
| 104 | + private static extern int SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(IntPtr handle); | ||
| 105 | + | ||
| 106 | + [DllImport(Dll.Filename)] | ||
| 107 | + private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcess(IntPtr handle, float[] samples, int n); | ||
| 108 | + | ||
| 109 | + [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)] | ||
| 110 | + private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(IntPtr handle, float[] samples, int n, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg); | ||
| 111 | + | ||
| 112 | + [DllImport(Dll.Filename)] | ||
| 113 | + private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(IntPtr handle); | ||
| 114 | + | ||
| 115 | + [DllImport(Dll.Filename)] | ||
| 116 | + private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(IntPtr handle); | ||
| 117 | + | ||
| 118 | + [DllImport(Dll.Filename)] | ||
| 119 | + private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(IntPtr handle); | ||
| 120 | + } | ||
| 121 | +} | ||
| 122 | + |
| 1 | +/// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + | ||
| 8 | + [StructLayout(LayoutKind.Sequential)] | ||
| 9 | + public struct OfflineSpeakerDiarizationConfig | ||
| 10 | + { | ||
| 11 | + public OfflineSpeakerDiarizationConfig() | ||
| 12 | + { | ||
| 13 | + Segmentation = new OfflineSpeakerSegmentationModelConfig(); | ||
| 14 | + Embedding = new SpeakerEmbeddingExtractorConfig(); | ||
| 15 | + Clustering = new FastClusteringConfig(); | ||
| 16 | + | ||
| 17 | + MinDurationOn = 0.3F; | ||
| 18 | + MinDurationOff = 0.5F; | ||
| 19 | + } | ||
| 20 | + | ||
| 21 | + public OfflineSpeakerSegmentationModelConfig Segmentation; | ||
| 22 | + public SpeakerEmbeddingExtractorConfig Embedding; | ||
| 23 | + public FastClusteringConfig Clustering; | ||
| 24 | + | ||
| 25 | + public float MinDurationOn; | ||
| 26 | + public float MinDurationOff; | ||
| 27 | + } | ||
| 28 | +} | ||
| 29 | + | ||
| 30 | + | ||
| 31 | + |
| 1 | +/// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +using System; | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | +using System.Text; | ||
| 5 | + | ||
| 6 | +namespace SherpaOnnx | ||
| 7 | +{ | ||
| 8 | + | ||
| 9 | + public class OfflineSpeakerDiarizationSegment | ||
| 10 | + { | ||
| 11 | + public OfflineSpeakerDiarizationSegment(IntPtr handle) | ||
| 12 | + { | ||
| 13 | + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); | ||
| 14 | + | ||
| 15 | + Start = impl.Start; | ||
| 16 | + End = impl.End; | ||
| 17 | + Speaker = impl.Speaker; | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + [StructLayout(LayoutKind.Sequential)] | ||
| 21 | + struct Impl | ||
| 22 | + { | ||
| 23 | + public float Start; | ||
| 24 | + public float End; | ||
| 25 | + public int Speaker; | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + public float Start; | ||
| 29 | + public float End; | ||
| 30 | + public int Speaker; | ||
| 31 | + } | ||
| 32 | +} | ||
| 33 | + |
| 1 | +/// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + | ||
| 8 | + [StructLayout(LayoutKind.Sequential)] | ||
| 9 | + public struct OfflineSpeakerSegmentationModelConfig | ||
| 10 | + { | ||
| 11 | + public OfflineSpeakerSegmentationModelConfig() | ||
| 12 | + { | ||
| 13 | + Pyannote = new OfflineSpeakerSegmentationPyannoteModelConfig(); | ||
| 14 | + NumThreads = 1; | ||
| 15 | + Debug = 0; | ||
| 16 | + Provider = "cpu"; | ||
| 17 | + } | ||
| 18 | + | ||
| 19 | + public OfflineSpeakerSegmentationPyannoteModelConfig Pyannote; | ||
| 20 | + | ||
| 21 | + /// Number of threads used to run the neural network model | ||
| 22 | + public int NumThreads; | ||
| 23 | + | ||
| 24 | + /// true to print debug information of the model | ||
| 25 | + public int Debug; | ||
| 26 | + | ||
| 27 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 28 | + public string Provider; | ||
| 29 | + } | ||
| 30 | +} | ||
| 31 | + | ||
| 32 | + |
| 1 | +/// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + | ||
| 8 | + [StructLayout(LayoutKind.Sequential)] | ||
| 9 | + public struct OfflineSpeakerSegmentationPyannoteModelConfig | ||
| 10 | + { | ||
| 11 | + public OfflineSpeakerSegmentationPyannoteModelConfig() | ||
| 12 | + { | ||
| 13 | + Model = ""; | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 17 | + public string Model; | ||
| 18 | + } | ||
| 19 | +} | ||
| 20 | + |
-
请 注册 或 登录 后发表评论