Fangjun Kuang
Committed by GitHub

Wrap VAD APIs to C# (#946)

... ... @@ -2,7 +2,10 @@
cd dotnet-examples/
cd offline-punctuation
cd vad-non-streaming-asr-paraformer
./run.sh
cd ../offline-punctuation
./run.sh
cd ../speaker-identification
... ...
... ... @@ -67,7 +67,7 @@ jobs:
-DCMAKE_BUILD_TYPE=Release \
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
-DSHERPA_ONNX_ENABLE_BINARY=ON \
..
cmake --build . --target install --config Release
... ... @@ -197,6 +197,7 @@ jobs:
cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification
cp -v scripts/dotnet/examples/offline-punctuation.csproj dotnet-examples/offline-punctuation
cp -v scripts/dotnet/examples/vad-non-streaming-asr-paraformer.csproj dotnet-examples/vad-non-streaming-asr-paraformer
ls -lh /tmp
... ...
... ... @@ -17,7 +17,6 @@ using System;
class OfflinePunctuationDemo
{
static void Main(string[] args)
{
var config = new OfflinePunctuationConfig();
... ... @@ -42,4 +41,3 @@ class OfflinePunctuationDemo
}
}
}
... ...
... ... @@ -21,6 +21,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "s
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "vad-non-streaming-asr-paraformer", "vad-non-streaming-asr-paraformer\vad-non-streaming-asr-paraformer.csproj", "{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
... ... @@ -66,5 +68,9 @@ Global
{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.Build.0 = Release|Any CPU
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
... ...
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to use a silero_vad model with a non-streaming Paraformer
// for speech recognition.
using SherpaOnnx;
using System.Collections.Generic;
using System;
class VadNonStreamingAsrParaformer
{
static void Main(string[] args)
{
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
config.ModelConfig.Debug = 0;
OfflineRecognizer recognizer = new OfflineRecognizer(config);
VadModelConfig vadModelConfig = new VadModelConfig();
vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
vadModelConfig.Debug = 0;
VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
string testWaveFilename = "./lei-jun-test.wav";
WaveReader reader = new WaveReader(testWaveFilename);
int numSamples = reader.Samples.Length;
int windowSize = vadModelConfig.SileroVad.WindowSize;
int sampleRate = vadModelConfig.SampleRate;
int numIter = numSamples / windowSize;
for (int i = 0; i != numIter; ++i) {
int start = i * windowSize;
float[] samples = new float[windowSize];
Array.Copy(reader.Samples, start, samples, 0, windowSize);
vad.AcceptWaveform(samples);
if (vad.IsSpeechDetected()) {
while (!vad.IsEmpty()) {
SpeechSegment segment = vad.Front();
float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate;
OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
recognizer.Decode(stream);
String text = stream.Result.Text;
if (!String.IsNullOrEmpty(text)) {
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
String.Format("{0:0.00}", startTime+duration), text);
}
vad.Pop();
}
}
}
}
}
... ...
../online-decode-files/WaveReader.cs
\ No newline at end of file
... ...
#!/usr/bin/env bash
set -ex
if [ ! -f ./silero_vad.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if [ ! -f ./lei-jun-test.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
fi
dotnet run
... ...
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>
... ...
... ... @@ -39,10 +39,6 @@ public class VadNonStreamingParaformer {
String model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
String tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
String waveFilename = "./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/3-sichuan.wav";
WaveReader reader = new WaveReader(waveFilename);
OfflineParaformerModelConfig paraformer =
OfflineParaformerModelConfig.builder().setModel(model).build();
... ...
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Linq;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using System;
namespace SherpaOnnx
{
public class CircularBuffer : IDisposable
{
public CircularBuffer(int capacity)
{
IntPtr h = SherpaOnnxCreateCircularBuffer(capacity);
_handle = new HandleRef(this, h);
}
public void Push(float[] data)
{
SherpaOnnxCircularBufferPush(_handle.Handle, data, data.Length);
}
public float[] Get(int startIndex, int n)
{
IntPtr p = SherpaOnnxCircularBufferGet(_handle.Handle, startIndex, n);
float[] ans = new float[n];
Marshal.Copy(p, ans, 0, n);
SherpaOnnxCircularBufferFree(p);
return ans;
}
public void Pop(int n)
{
SherpaOnnxCircularBufferPop(_handle.Handle, n);
}
public int Size
{
get
{
return SherpaOnnxCircularBufferSize(_handle.Handle);
}
}
public int Head
{
get
{
return SherpaOnnxCircularBufferHead(_handle.Handle);
}
}
public void Reset()
{
SherpaOnnxCircularBufferReset(_handle.Handle);
}
public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}
~CircularBuffer()
{
Cleanup();
}
private void Cleanup()
{
SherpaOnnxDestroyCircularBuffer(_handle.Handle);
// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}
private HandleRef _handle;
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxCreateCircularBuffer(int capacity);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxDestroyCircularBuffer(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxCircularBufferPush(IntPtr handle, float[] p, int n);
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxCircularBufferGet(IntPtr handle, int startIndex, int n);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxCircularBufferFree(IntPtr p);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxCircularBufferPop(IntPtr handle, int n);
[DllImport(Dll.Filename)]
private static extern int SherpaOnnxCircularBufferSize(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern int SherpaOnnxCircularBufferHead(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxCircularBufferReset(IntPtr handle);
}
}
... ...
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Linq;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct SileroVadModelConfig
{
public SileroVadModelConfig()
{
Model = "";
Threshold = 0.5F;
MinSilenceDuration = 0.5F;
MinSpeechDuration = 0.25F;
WindowSize = 512;
}
[MarshalAs(UnmanagedType.LPStr)]
public string Model;
public float Threshold;
public float MinSilenceDuration;
public float MinSpeechDuration;
public int WindowSize;
}
}
... ...
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Linq;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using System;
namespace SherpaOnnx
{
public class SpeechSegment
{
public SpeechSegment(IntPtr handle)
{
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
_start = impl.Start;
unsafe
{
float* t = (float*)impl.Samples;
_samples = new float[impl.Count];
fixed (float* pTarget = _samples)
{
for (int i = 0; i < impl.Count; i++)
{
pTarget[i] = t[i];
}
}
}
}
public int _start;
public int Start => _start;
private float[] _samples;
public float[] Samples => _samples;
[StructLayout(LayoutKind.Sequential)]
struct Impl
{
public int Start;
public IntPtr Samples;
public int Count;
}
}
}
... ...
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Linq;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct VadModelConfig
{
public VadModelConfig()
{
SileroVad = new SileroVadModelConfig();
SampleRate = 16000;
NumThreads = 1;
Provider = "cpu";
Debug = 0;
}
public SileroVadModelConfig SileroVad;
public int SampleRate;
public int NumThreads;
[MarshalAs(UnmanagedType.LPStr)]
public string Provider;
public int Debug;
}
}
... ...
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Linq;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using System;
namespace SherpaOnnx
{
public class VoiceActivityDetector : IDisposable
{
public VoiceActivityDetector(VadModelConfig config, float bufferSizeInSeconds)
{
IntPtr h = SherpaOnnxCreateVoiceActivityDetector(ref config, bufferSizeInSeconds);
_handle = new HandleRef(this, h);
}
public void AcceptWaveform(float[] samples)
{
SherpaOnnxVoiceActivityDetectorAcceptWaveform(_handle.Handle, samples, samples.Length);
}
public bool IsEmpty()
{
return SherpaOnnxVoiceActivityDetectorEmpty(_handle.Handle) == 1;
}
public bool IsSpeechDetected()
{
return SherpaOnnxVoiceActivityDetectorDetected(_handle.Handle) == 1;
}
public void Pop()
{
SherpaOnnxVoiceActivityDetectorPop(_handle.Handle);
}
public SpeechSegment Front()
{
IntPtr p = SherpaOnnxVoiceActivityDetectorFront(_handle.Handle);
SpeechSegment segment = new SpeechSegment(p);
SherpaOnnxDestroySpeechSegment(p);
return segment;
}
public void Clear()
{
SherpaOnnxVoiceActivityDetectorClear(_handle.Handle);
}
public void Reset()
{
SherpaOnnxVoiceActivityDetectorReset(_handle.Handle);
}
public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}
~VoiceActivityDetector()
{
Cleanup();
}
private void Cleanup()
{
SherpaOnnxDestroyVoiceActivityDetector(_handle.Handle);
// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}
private HandleRef _handle;
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxCreateVoiceActivityDetector(ref VadModelConfig config, float bufferSizeInSeconds);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxDestroyVoiceActivityDetector(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxVoiceActivityDetectorAcceptWaveform(IntPtr handle, float[] samples, int n);
[DllImport(Dll.Filename)]
private static extern int SherpaOnnxVoiceActivityDetectorEmpty(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern int SherpaOnnxVoiceActivityDetectorDetected(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxVoiceActivityDetectorPop(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxVoiceActivityDetectorClear(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxVoiceActivityDetectorFront(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxDestroySpeechSegment(IntPtr segment);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxVoiceActivityDetectorReset(IntPtr handle);
}
}
... ...
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<PropertyGroup>
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>
... ...
... ... @@ -4,7 +4,7 @@
<PackageReadmeFile>README.md</PackageReadmeFile>
<OutputType>Library</OutputType>
<LangVersion>10.0</LangVersion>
<TargetFrameworks>netstandard2.0;netcoreapp3.1;net6.0;net7.0</TargetFrameworks>
<TargetFrameworks>netstandard2.0</TargetFrameworks>
<RuntimeIdentifiers>linux-x64;osx-x64;win-x64</RuntimeIdentifiers>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<AssemblyName>sherpa-onnx</AssemblyName>
... ...
... ... @@ -3,7 +3,7 @@
<PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
<PackageReadmeFile>README.md</PackageReadmeFile>
<OutputType>Library</OutputType>
<TargetFrameworks>netstandard2.0;netcoreapp3.1;net6.0;net7.0</TargetFrameworks>
<TargetFrameworks>netstandard2.0</TargetFrameworks>
<RuntimeIdentifier>{{ dotnet_rid }}</RuntimeIdentifier>
<AssemblyName>sherpa-onnx</AssemblyName>
<Version>{{ version }}</Version>
... ...