正在显示
10 个修改的文件
包含
511 行增加
和
2 行删除
| @@ -179,6 +179,7 @@ jobs: | @@ -179,6 +179,7 @@ jobs: | ||
| 179 | cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/ | 179 | cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/ |
| 180 | cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/ | 180 | cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/ |
| 181 | cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding | 181 | cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding |
| 182 | + cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification | ||
| 182 | 183 | ||
| 183 | ls -lh /tmp | 184 | ls -lh /tmp |
| 184 | 185 |
| @@ -17,6 +17,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identificat | @@ -17,6 +17,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identificat | ||
| 17 | EndProject | 17 | EndProject |
| 18 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}" | 18 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}" |
| 19 | EndProject | 19 | EndProject |
| 20 | +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}" | ||
| 21 | +EndProject | ||
| 20 | Global | 22 | Global |
| 21 | GlobalSection(SolutionConfigurationPlatforms) = preSolution | 23 | GlobalSection(SolutionConfigurationPlatforms) = preSolution |
| 22 | Debug|Any CPU = Debug|Any CPU | 24 | Debug|Any CPU = Debug|Any CPU |
| @@ -54,5 +56,9 @@ Global | @@ -54,5 +56,9 @@ Global | ||
| 54 | {C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU | 56 | {C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU |
| 55 | {C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU | 57 | {C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU |
| 56 | {C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU | 58 | {C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU |
| 59 | + {2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| 60 | + {2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| 61 | + {2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| 62 | + {2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| 57 | EndGlobalSection | 63 | EndGlobalSection |
| 58 | EndGlobal | 64 | EndGlobal |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +// | ||
| 3 | +// This file shows how to do speaker identification with sherpa-onnx. | ||
| 4 | +// | ||
| 5 | +// 1. Download a model from | ||
| 6 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models | ||
| 7 | +// | ||
| 8 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | ||
| 9 | +// | ||
| 10 | +// 2. Download test data from | ||
| 11 | +// | ||
| 12 | +// git clone https://github.com/csukuangfj/sr-data | ||
| 13 | +// | ||
| 14 | +// 3. Now run it | ||
| 15 | +// | ||
| 16 | +// dotnet run | ||
| 17 | + | ||
| 18 | +using SherpaOnnx; | ||
| 19 | +using System.Collections.Generic; | ||
| 20 | +using System; | ||
| 21 | + | ||
| 22 | +class SpeakerIdentificationDemo | ||
| 23 | +{ | ||
| 24 | + public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename) | ||
| 25 | + { | ||
| 26 | + WaveReader reader = new WaveReader(filename); | ||
| 27 | + | ||
| 28 | + OnlineStream stream = extractor.CreateStream(); | ||
| 29 | + stream.AcceptWaveform(reader.SampleRate, reader.Samples); | ||
| 30 | + stream.InputFinished(); | ||
| 31 | + | ||
| 32 | + float[] embedding = extractor.Compute(stream); | ||
| 33 | + | ||
| 34 | + return embedding; | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + static void Main(string[] args) | ||
| 38 | + { | ||
| 39 | + var config = new SpeakerEmbeddingExtractorConfig(); | ||
| 40 | + config.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"; | ||
| 41 | + config.Debug = 1; | ||
| 42 | + var extractor = new SpeakerEmbeddingExtractor(config); | ||
| 43 | + | ||
| 44 | + var manager = new SpeakerEmbeddingManager(extractor.Dim); | ||
| 45 | + | ||
| 46 | + string[] spk1Files = | ||
| 47 | + new string[] { | ||
| 48 | + "./sr-data/enroll/fangjun-sr-1.wav", | ||
| 49 | + "./sr-data/enroll/fangjun-sr-2.wav", | ||
| 50 | + "./sr-data/enroll/fangjun-sr-3.wav", | ||
| 51 | + }; | ||
| 52 | + float[][] spk1Vec = new float[spk1Files.Length][]; | ||
| 53 | + | ||
| 54 | + for (int i = 0; i < spk1Files.Length; ++i) | ||
| 55 | + { | ||
| 56 | + spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]); | ||
| 57 | + } | ||
| 58 | + | ||
| 59 | + string[] spk2Files = | ||
| 60 | + new string[] { | ||
| 61 | + "./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav", | ||
| 62 | + }; | ||
| 63 | + | ||
| 64 | + float[][] spk2Vec = new float[spk2Files.Length][]; | ||
| 65 | + | ||
| 66 | + for (int i = 0; i < spk2Files.Length; ++i) | ||
| 67 | + { | ||
| 68 | + spk2Vec[i] = ComputeEmbedding(extractor, spk2Files[i]); | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + if (!manager.Add("fangjun", spk1Vec)) | ||
| 72 | + { | ||
| 73 | + Console.WriteLine("Failed to register fangjun"); | ||
| 74 | + return; | ||
| 75 | + } | ||
| 76 | + | ||
| 77 | + if (!manager.Add("leijun", spk2Vec)) | ||
| 78 | + { | ||
| 79 | + Console.WriteLine("Failed to register leijun"); | ||
| 80 | + return; | ||
| 81 | + } | ||
| 82 | + | ||
| 83 | + if (manager.NumSpeakers != 2) | ||
| 84 | + { | ||
| 85 | + Console.WriteLine("There should be two speakers"); | ||
| 86 | + return; | ||
| 87 | + } | ||
| 88 | + | ||
| 89 | + if (!manager.Contains("fangjun")) | ||
| 90 | + { | ||
| 91 | + Console.WriteLine("It should contain the speaker fangjun"); | ||
| 92 | + return; | ||
| 93 | + } | ||
| 94 | + | ||
| 95 | + if (!manager.Contains("leijun")) | ||
| 96 | + { | ||
| 97 | + Console.WriteLine("It should contain the speaker leijun"); | ||
| 98 | + return; | ||
| 99 | + } | ||
| 100 | + | ||
| 101 | + Console.WriteLine("---All speakers---"); | ||
| 102 | + | ||
| 103 | + string[] allSpeakers = manager.GetAllSpeakers(); | ||
| 104 | + foreach (var s in allSpeakers) | ||
| 105 | + { | ||
| 106 | + Console.WriteLine(s); | ||
| 107 | + } | ||
| 108 | + Console.WriteLine("------------"); | ||
| 109 | + | ||
| 110 | + string[] testFiles = | ||
| 111 | + new string[] { | ||
| 112 | + "./sr-data/test/fangjun-test-sr-1.wav", | ||
| 113 | + "./sr-data/test/leijun-test-sr-1.wav", | ||
| 114 | + "./sr-data/test/liudehua-test-sr-1.wav" | ||
| 115 | + }; | ||
| 116 | + | ||
| 117 | + float threshold = 0.6f; | ||
| 118 | + foreach (var file in testFiles) | ||
| 119 | + { | ||
| 120 | + float[] embedding = ComputeEmbedding(extractor, file); | ||
| 121 | + | ||
| 122 | + String name = manager.Search(embedding, threshold); | ||
| 123 | + if (name == "") | ||
| 124 | + { | ||
| 125 | + name = "<Unknown>"; | ||
| 126 | + } | ||
| 127 | + Console.WriteLine("{0}: {1}", file, name); | ||
| 128 | + } | ||
| 129 | + | ||
| 130 | + // test verify | ||
| 131 | + if (!manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold)) | ||
| 132 | + { | ||
| 133 | + Console.WriteLine("testFiles[0] should match fangjun!"); | ||
| 134 | + return; | ||
| 135 | + } | ||
| 136 | + | ||
| 137 | + if (!manager.Remove("fangjun")) | ||
| 138 | + { | ||
| 139 | + Console.WriteLine("Failed to remove fangjun"); | ||
| 140 | + return; | ||
| 141 | + } | ||
| 142 | + | ||
| 143 | + if (manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold)) | ||
| 144 | + { | ||
| 145 | + Console.WriteLine("{0} should match no one!", testFiles[0]); | ||
| 146 | + return; | ||
| 147 | + } | ||
| 148 | + | ||
| 149 | + if (manager.NumSpeakers != 1) | ||
| 150 | + { | ||
| 151 | + Console.WriteLine("There should only 1 speaker left."); | ||
| 152 | + return; | ||
| 153 | + } | ||
| 154 | + } | ||
| 155 | +} |
| 1 | +../offline-decode-files/WaveReader.cs |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -e ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | ||
| 7 | +fi | ||
| 8 | + | ||
| 9 | +if [ ! -d ./sr-data ]; then | ||
| 10 | + git clone https://github.com/csukuangfj/sr-data | ||
| 11 | +fi | ||
| 12 | + | ||
| 13 | +dotnet run |
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + | ||
| 3 | + <PropertyGroup> | ||
| 4 | + <OutputType>Exe</OutputType> | ||
| 5 | + <TargetFramework>net6.0</TargetFramework> | ||
| 6 | + <RootNamespace>speaker_identification</RootNamespace> | ||
| 7 | + <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | + <Nullable>enable</Nullable> | ||
| 9 | + </PropertyGroup> | ||
| 10 | + | ||
| 11 | + <ItemGroup> | ||
| 12 | + <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" /> | ||
| 13 | + </ItemGroup> | ||
| 14 | + | ||
| 15 | +</Project> |
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + | ||
| 3 | + <PropertyGroup> | ||
| 4 | + <OutputType>Exe</OutputType> | ||
| 5 | + <TargetFramework>net6.0</TargetFramework> | ||
| 6 | + <RootNamespace>speaker_identification</RootNamespace> | ||
| 7 | + <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | + <Nullable>enable</Nullable> | ||
| 9 | + </PropertyGroup> | ||
| 10 | + | ||
| 11 | + <PropertyGroup> | ||
| 12 | + <RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources> | ||
| 13 | + </PropertyGroup> | ||
| 14 | + | ||
| 15 | + <ItemGroup> | ||
| 16 | + <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" /> | ||
| 17 | + </ItemGroup> | ||
| 18 | + | ||
| 19 | +</Project> |
| @@ -222,6 +222,14 @@ namespace SherpaOnnx | @@ -222,6 +222,14 @@ namespace SherpaOnnx | ||
| 222 | } | 222 | } |
| 223 | } | 223 | } |
| 224 | 224 | ||
| 225 | + public int NumSpeakers | ||
| 226 | + { | ||
| 227 | + get | ||
| 228 | + { | ||
| 229 | + return SherpaOnnxOfflineTtsNumSpeakers(_handle.Handle); | ||
| 230 | + } | ||
| 231 | + } | ||
| 232 | + | ||
| 225 | [DllImport(Dll.Filename)] | 233 | [DllImport(Dll.Filename)] |
| 226 | private static extern IntPtr SherpaOnnxCreateOfflineTts(ref OfflineTtsConfig config); | 234 | private static extern IntPtr SherpaOnnxCreateOfflineTts(ref OfflineTtsConfig config); |
| 227 | 235 | ||
| @@ -232,6 +240,9 @@ namespace SherpaOnnx | @@ -232,6 +240,9 @@ namespace SherpaOnnx | ||
| 232 | private static extern int SherpaOnnxOfflineTtsSampleRate(IntPtr handle); | 240 | private static extern int SherpaOnnxOfflineTtsSampleRate(IntPtr handle); |
| 233 | 241 | ||
| 234 | [DllImport(Dll.Filename)] | 242 | [DllImport(Dll.Filename)] |
| 243 | + private static extern int SherpaOnnxOfflineTtsNumSpeakers(IntPtr handle); | ||
| 244 | + | ||
| 245 | + [DllImport(Dll.Filename)] | ||
| 235 | private static extern IntPtr SherpaOnnxOfflineTtsGenerate(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text, int sid, float speed); | 246 | private static extern IntPtr SherpaOnnxOfflineTtsGenerate(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text, int sid, float speed); |
| 236 | 247 | ||
| 237 | [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)] | 248 | [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)] |
| @@ -557,6 +568,112 @@ namespace SherpaOnnx | @@ -557,6 +568,112 @@ namespace SherpaOnnx | ||
| 557 | } | 568 | } |
| 558 | 569 | ||
| 559 | [StructLayout(LayoutKind.Sequential)] | 570 | [StructLayout(LayoutKind.Sequential)] |
| 571 | + public struct SpeakerEmbeddingExtractorConfig | ||
| 572 | + { | ||
| 573 | + public SpeakerEmbeddingExtractorConfig() | ||
| 574 | + { | ||
| 575 | + Model = ""; | ||
| 576 | + NumThreads = 1; | ||
| 577 | + Debug = 0; | ||
| 578 | + Provider = "cpu"; | ||
| 579 | + } | ||
| 580 | + | ||
| 581 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 582 | + public string Model; | ||
| 583 | + | ||
| 584 | + public int NumThreads; | ||
| 585 | + public int Debug; | ||
| 586 | + | ||
| 587 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 588 | + public string Provider; | ||
| 589 | + } | ||
| 590 | + | ||
| 591 | + public class SpeakerEmbeddingExtractor : IDisposable | ||
| 592 | + { | ||
| 593 | + public SpeakerEmbeddingExtractor(SpeakerEmbeddingExtractorConfig config) | ||
| 594 | + { | ||
| 595 | + IntPtr h = SherpaOnnxCreateSpeakerEmbeddingExtractor(ref config); | ||
| 596 | + _handle = new HandleRef(this, h); | ||
| 597 | + } | ||
| 598 | + | ||
| 599 | + public OnlineStream CreateStream() | ||
| 600 | + { | ||
| 601 | + IntPtr p = SherpaOnnxSpeakerEmbeddingExtractorCreateStream(_handle.Handle); | ||
| 602 | + return new OnlineStream(p); | ||
| 603 | + } | ||
| 604 | + | ||
| 605 | + public bool IsReady(OnlineStream stream) | ||
| 606 | + { | ||
| 607 | + return SherpaOnnxSpeakerEmbeddingExtractorIsReady(_handle.Handle, stream.Handle) != 0; | ||
| 608 | + } | ||
| 609 | + | ||
| 610 | + public float[] Compute(OnlineStream stream) | ||
| 611 | + { | ||
| 612 | + IntPtr p = SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(_handle.Handle, stream.Handle); | ||
| 613 | + | ||
| 614 | + int dim = Dim; | ||
| 615 | + float[] ans = new float[dim]; | ||
| 616 | + Marshal.Copy(p, ans, 0, dim); | ||
| 617 | + | ||
| 618 | + SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(p); | ||
| 619 | + | ||
| 620 | + return ans; | ||
| 621 | + } | ||
| 622 | + | ||
| 623 | + public int Dim | ||
| 624 | + { | ||
| 625 | + get | ||
| 626 | + { | ||
| 627 | + return SherpaOnnxSpeakerEmbeddingExtractorDim(_handle.Handle); | ||
| 628 | + } | ||
| 629 | + } | ||
| 630 | + | ||
| 631 | + public void Dispose() | ||
| 632 | + { | ||
| 633 | + Cleanup(); | ||
| 634 | + // Prevent the object from being placed on the | ||
| 635 | + // finalization queue | ||
| 636 | + System.GC.SuppressFinalize(this); | ||
| 637 | + } | ||
| 638 | + | ||
| 639 | + ~SpeakerEmbeddingExtractor() | ||
| 640 | + { | ||
| 641 | + Cleanup(); | ||
| 642 | + } | ||
| 643 | + | ||
| 644 | + private void Cleanup() | ||
| 645 | + { | ||
| 646 | + SherpaOnnxDestroySpeakerEmbeddingExtractor(_handle.Handle); | ||
| 647 | + | ||
| 648 | + // Don't permit the handle to be used again. | ||
| 649 | + _handle = new HandleRef(this, IntPtr.Zero); | ||
| 650 | + } | ||
| 651 | + | ||
| 652 | + private HandleRef _handle; | ||
| 653 | + | ||
| 654 | + [DllImport(Dll.Filename)] | ||
| 655 | + private static extern IntPtr SherpaOnnxCreateSpeakerEmbeddingExtractor(ref SpeakerEmbeddingExtractorConfig config); | ||
| 656 | + | ||
| 657 | + [DllImport(Dll.Filename)] | ||
| 658 | + private static extern void SherpaOnnxDestroySpeakerEmbeddingExtractor(IntPtr handle); | ||
| 659 | + | ||
| 660 | + [DllImport(Dll.Filename)] | ||
| 661 | + private static extern int SherpaOnnxSpeakerEmbeddingExtractorDim(IntPtr handle); | ||
| 662 | + | ||
| 663 | + [DllImport(Dll.Filename)] | ||
| 664 | + private static extern IntPtr SherpaOnnxSpeakerEmbeddingExtractorCreateStream(IntPtr handle); | ||
| 665 | + | ||
| 666 | + [DllImport(Dll.Filename)] | ||
| 667 | + private static extern int SherpaOnnxSpeakerEmbeddingExtractorIsReady(IntPtr handle, IntPtr stream); | ||
| 668 | + | ||
| 669 | + [DllImport(Dll.Filename)] | ||
| 670 | + private static extern IntPtr SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(IntPtr handle, IntPtr stream); | ||
| 671 | + | ||
| 672 | + [DllImport(Dll.Filename)] | ||
| 673 | + private static extern void SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(IntPtr p); | ||
| 674 | + } | ||
| 675 | + | ||
| 676 | + [StructLayout(LayoutKind.Sequential)] | ||
| 560 | public struct SpokenLanguageIdentificationWhisperConfig | 677 | public struct SpokenLanguageIdentificationWhisperConfig |
| 561 | { | 678 | { |
| 562 | public SpokenLanguageIdentificationWhisperConfig() | 679 | public SpokenLanguageIdentificationWhisperConfig() |
| @@ -593,6 +710,185 @@ namespace SherpaOnnx | @@ -593,6 +710,185 @@ namespace SherpaOnnx | ||
| 593 | public string Provider; | 710 | public string Provider; |
| 594 | } | 711 | } |
| 595 | 712 | ||
| 713 | + public class SpeakerEmbeddingManager : IDisposable | ||
| 714 | + { | ||
| 715 | + public SpeakerEmbeddingManager(int dim) | ||
| 716 | + { | ||
| 717 | + IntPtr h = SherpaOnnxCreateSpeakerEmbeddingManager(dim); | ||
| 718 | + _handle = new HandleRef(this, h); | ||
| 719 | + this._dim = dim; | ||
| 720 | + } | ||
| 721 | + | ||
| 722 | + public bool Add(string name, float[] v) | ||
| 723 | + { | ||
| 724 | + return SherpaOnnxSpeakerEmbeddingManagerAdd(_handle.Handle, name, v) == 1; | ||
| 725 | + } | ||
| 726 | + | ||
| 727 | + public bool Add(string name, ICollection<float[]> v_list) | ||
| 728 | + { | ||
| 729 | + int n = v_list.Count; | ||
| 730 | + float[] v = new float[n * _dim]; | ||
| 731 | + int i = 0; | ||
| 732 | + foreach (var item in v_list) | ||
| 733 | + { | ||
| 734 | + item.CopyTo(v, i); | ||
| 735 | + i += _dim; | ||
| 736 | + } | ||
| 737 | + | ||
| 738 | + return SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(_handle.Handle, name, v, n) == 1; | ||
| 739 | + } | ||
| 740 | + | ||
| 741 | + public bool Remove(string name) | ||
| 742 | + { | ||
| 743 | + return SherpaOnnxSpeakerEmbeddingManagerRemove(_handle.Handle, name) == 1; | ||
| 744 | + } | ||
| 745 | + | ||
| 746 | + public string Search(float[] v, float threshold) | ||
| 747 | + { | ||
| 748 | + IntPtr p = SherpaOnnxSpeakerEmbeddingManagerSearch(_handle.Handle, v, threshold); | ||
| 749 | + | ||
| 750 | + string s = ""; | ||
| 751 | + int length = 0; | ||
| 752 | + | ||
| 753 | + unsafe | ||
| 754 | + { | ||
| 755 | + byte* b = (byte*)p; | ||
| 756 | + if (b != null) | ||
| 757 | + { | ||
| 758 | + while (*b != 0) | ||
| 759 | + { | ||
| 760 | + ++b; | ||
| 761 | + length += 1; | ||
| 762 | + } | ||
| 763 | + } | ||
| 764 | + } | ||
| 765 | + | ||
| 766 | + if (length > 0) | ||
| 767 | + { | ||
| 768 | + byte[] stringBuffer = new byte[length]; | ||
| 769 | + Marshal.Copy(p, stringBuffer, 0, length); | ||
| 770 | + s = Encoding.UTF8.GetString(stringBuffer); | ||
| 771 | + } | ||
| 772 | + | ||
| 773 | + SherpaOnnxSpeakerEmbeddingManagerFreeSearch(p); | ||
| 774 | + | ||
| 775 | + return s; | ||
| 776 | + } | ||
| 777 | + | ||
| 778 | + public bool Verify(string name, float[] v, float threshold) | ||
| 779 | + { | ||
| 780 | + return SherpaOnnxSpeakerEmbeddingManagerVerify(_handle.Handle, name, v, threshold) == 1; | ||
| 781 | + } | ||
| 782 | + | ||
| 783 | + public bool Contains(string name) | ||
| 784 | + { | ||
| 785 | + return SherpaOnnxSpeakerEmbeddingManagerContains(_handle.Handle, name) == 1; | ||
| 786 | + } | ||
| 787 | + | ||
| 788 | + public string[] GetAllSpeakers() | ||
| 789 | + { | ||
| 790 | + if (NumSpeakers == 0) | ||
| 791 | + { | ||
| 792 | + return new string[] { }; | ||
| 793 | + } | ||
| 794 | + | ||
| 795 | + IntPtr names = SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(_handle.Handle); | ||
| 796 | + | ||
| 797 | + string[] ans = new string[NumSpeakers]; | ||
| 798 | + | ||
| 799 | + unsafe | ||
| 800 | + { | ||
| 801 | + byte** p = (byte**)names; | ||
| 802 | + for (int i = 0; i != NumSpeakers; i++) | ||
| 803 | + { | ||
| 804 | + int length = 0; | ||
| 805 | + byte* s = p[i]; | ||
| 806 | + while (*s != 0) | ||
| 807 | + { | ||
| 808 | + ++s; | ||
| 809 | + length += 1; | ||
| 810 | + } | ||
| 811 | + byte[] stringBuffer = new byte[length]; | ||
| 812 | + Marshal.Copy((IntPtr)p[i], stringBuffer, 0, length); | ||
| 813 | + ans[i] = Encoding.UTF8.GetString(stringBuffer); | ||
| 814 | + } | ||
| 815 | + } | ||
| 816 | + | ||
| 817 | + SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(names); | ||
| 818 | + | ||
| 819 | + return ans; | ||
| 820 | + } | ||
| 821 | + | ||
| 822 | + public void Dispose() | ||
| 823 | + { | ||
| 824 | + Cleanup(); | ||
| 825 | + // Prevent the object from being placed on the | ||
| 826 | + // finalization queue | ||
| 827 | + System.GC.SuppressFinalize(this); | ||
| 828 | + } | ||
| 829 | + | ||
| 830 | + ~SpeakerEmbeddingManager() | ||
| 831 | + { | ||
| 832 | + Cleanup(); | ||
| 833 | + } | ||
| 834 | + | ||
| 835 | + private void Cleanup() | ||
| 836 | + { | ||
| 837 | + SherpaOnnxDestroySpeakerEmbeddingManager(_handle.Handle); | ||
| 838 | + | ||
| 839 | + // Don't permit the handle to be used again. | ||
| 840 | + _handle = new HandleRef(this, IntPtr.Zero); | ||
| 841 | + } | ||
| 842 | + | ||
| 843 | + public int NumSpeakers | ||
| 844 | + { | ||
| 845 | + get | ||
| 846 | + { | ||
| 847 | + return SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(_handle.Handle); | ||
| 848 | + } | ||
| 849 | + } | ||
| 850 | + | ||
| 851 | + private HandleRef _handle; | ||
| 852 | + private int _dim; | ||
| 853 | + | ||
| 854 | + | ||
| 855 | + [DllImport(Dll.Filename)] | ||
| 856 | + private static extern IntPtr SherpaOnnxCreateSpeakerEmbeddingManager(int dim); | ||
| 857 | + | ||
| 858 | + [DllImport(Dll.Filename)] | ||
| 859 | + private static extern void SherpaOnnxDestroySpeakerEmbeddingManager(IntPtr handle); | ||
| 860 | + | ||
| 861 | + [DllImport(Dll.Filename)] | ||
| 862 | + private static extern int SherpaOnnxSpeakerEmbeddingManagerAdd(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name, float[] v); | ||
| 863 | + | ||
| 864 | + [DllImport(Dll.Filename)] | ||
| 865 | + private static extern int SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name, float[] v, int n); | ||
| 866 | + | ||
| 867 | + [DllImport(Dll.Filename)] | ||
| 868 | + private static extern int SherpaOnnxSpeakerEmbeddingManagerRemove(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name); | ||
| 869 | + | ||
| 870 | + [DllImport(Dll.Filename)] | ||
| 871 | + private static extern IntPtr SherpaOnnxSpeakerEmbeddingManagerSearch(IntPtr handle, float[] v, float threshold); | ||
| 872 | + | ||
| 873 | + [DllImport(Dll.Filename)] | ||
| 874 | + private static extern void SherpaOnnxSpeakerEmbeddingManagerFreeSearch(IntPtr p); | ||
| 875 | + | ||
| 876 | + [DllImport(Dll.Filename)] | ||
| 877 | + private static extern int SherpaOnnxSpeakerEmbeddingManagerVerify(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name, float[] v, float threshold); | ||
| 878 | + | ||
| 879 | + [DllImport(Dll.Filename)] | ||
| 880 | + private static extern int SherpaOnnxSpeakerEmbeddingManagerContains(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string name); | ||
| 881 | + | ||
| 882 | + [DllImport(Dll.Filename)] | ||
| 883 | + private static extern int SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(IntPtr handle); | ||
| 884 | + | ||
| 885 | + [DllImport(Dll.Filename)] | ||
| 886 | + private static extern IntPtr SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(IntPtr handle); | ||
| 887 | + | ||
| 888 | + [DllImport(Dll.Filename)] | ||
| 889 | + private static extern void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(IntPtr names); | ||
| 890 | + } | ||
| 891 | + | ||
| 596 | public class SpokenLanguageIdentificationResult | 892 | public class SpokenLanguageIdentificationResult |
| 597 | { | 893 | { |
| 598 | public SpokenLanguageIdentificationResult(IntPtr handle) | 894 | public SpokenLanguageIdentificationResult(IntPtr handle) |
-
请 注册 或 登录 后发表评论