Fangjun Kuang
Committed by GitHub

Add C# API for speech enhancement GTCRN models (#1990)

... ... @@ -2,7 +2,11 @@
cd dotnet-examples/
cd ./kokoro-tts
cd ./speech-enhancement-gtcrn
./run.sh
ls -lh
cd ../kokoro-tts
./run-kokoro.sh
ls -lh
... ...
... ... @@ -35,6 +35,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\ko
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", "speech-enhancement-gtcrn\speech-enhancement-gtcrn.csproj", "{DF2569C6-6011-4716-9538-F9E9069E00EB}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
... ... @@ -105,6 +107,10 @@ Global
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
... ...
// Copyright (c) 2025 Xiaomi Corporation
//
// This file shows how to use speech enhancement API with GTCRN models.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
//
// 2. Download a test file
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
//
// 3. Now run it
//
// dotnet run
using SherpaOnnx;
class OfflineSpeechEnhancementDemo
{
static void Main(string[] args)
{
var config = new OfflineSpeechDenoiserConfig();
config.Model.Gtcrn.Model = "./gtcrn_simple.onnx";
config.Model.Debug = 1;
config.Model.NumThreads = 1;
var sd = new OfflineSpeechDenoiser(config);
WaveReader waveReader = new WaveReader("./inp_16k.wav");
var denoisedAudio = sd.Run(waveReader.Samples, waveReader.SampleRate);
var outputFilename = "./enhanced-16k.wav";
var ok = denoisedAudio.SaveToWaveFile(outputFilename);
if (ok)
{
Console.WriteLine($"Wrote to {outputFilename} succeeded!");
}
else
{
Console.WriteLine($"Failed to write {outputFilename}");
}
}
}
... ...
#!/usr/bin/env bash
set -ex
if [ ! -f ./gtcrn_simple.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi
if [ ! -f ./inp_16k.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi
dotnet run
... ...
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>speech_enhancement_gtcrn</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Common\Common.csproj" />
</ItemGroup>
</Project>
... ...
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
using System;
using System.Runtime.InteropServices;
using System.Text;
namespace SherpaOnnx
{
public class DenoisedAudio
{
public DenoisedAudio(IntPtr p)
{
_handle = new HandleRef(this, p);
}
public bool SaveToWaveFile(String filename)
{
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
byte[] utf8Filename = Encoding.UTF8.GetBytes(filename);
byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1]; // +1 for null terminator
Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length);
utf8FilenameWithNull[utf8Filename.Length] = 0; // Null terminator
int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, utf8FilenameWithNull);
return status == 1;
}
~DenoisedAudio()
{
Cleanup();
}
public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}
private void Cleanup()
{
SherpaOnnxDestroyDenoisedAudio(Handle);
// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}
[StructLayout(LayoutKind.Sequential)]
struct Impl
{
public IntPtr Samples;
public int NumSamples;
public int SampleRate;
}
private HandleRef _handle;
public IntPtr Handle => _handle.Handle;
public int NumSamples
{
get
{
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
return impl.NumSamples;
}
}
public int SampleRate
{
get
{
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
return impl.SampleRate;
}
}
public float[] Samples
{
get
{
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
float[] samples = new float[impl.NumSamples];
Marshal.Copy(impl.Samples, samples, 0, impl.NumSamples);
return samples;
}
}
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxDestroyDenoisedAudio(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern int SherpaOnnxWriteWave(IntPtr samples, int n, int sample_rate, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Filename);
}
}
... ...
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
public class OfflineSpeechDenoiser: IDisposable
{
public OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config)
{
IntPtr h = SherpaOnnxCreateOfflineSpeechDenoiser(ref config);
_handle = new HandleRef(this, h);
}
public DenoisedAudio Run(float[] samples, int sampleRate)
{
IntPtr p = SherpaOnnxOfflineSpeechDenoiserRun(_handle.Handle, samples, samples.Length, sampleRate);
return new DenoisedAudio(p);
}
public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}
~OfflineSpeechDenoiser()
{
Cleanup();
}
private void Cleanup()
{
SherpaOnnxDestroyOfflineSpeechDenoiser(_handle.Handle);
// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}
private HandleRef _handle;
public int SampleRate
{
get
{
return SherpaOnnxOfflineSpeechDenoiserGetSampleRate(_handle.Handle);
}
}
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxCreateOfflineSpeechDenoiser(ref OfflineSpeechDenoiserConfig config);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxDestroyOfflineSpeechDenoiser(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern int SherpaOnnxOfflineSpeechDenoiserGetSampleRate(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxOfflineSpeechDenoiserRun(IntPtr handle, float[] samples, int n, int sampleRate);
}
}
... ...
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineSpeechDenoiserConfig
{
public OfflineSpeechDenoiserConfig()
{
Model = new OfflineSpeechDenoiserModelConfig();
}
public OfflineSpeechDenoiserModelConfig Model;
}
}
... ...
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineSpeechDenoiserGtcrnModelConfig
{
public OfflineSpeechDenoiserGtcrnModelConfig()
{
Model = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Model;
}
}
... ...
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineSpeechDenoiserModelConfig
{
public OfflineSpeechDenoiserModelConfig()
{
Gtcrn = new OfflineSpeechDenoiserGtcrnModelConfig();
NumThreads = 1;
Debug = 0;
Provider = "cpu";
}
public OfflineSpeechDenoiserGtcrnModelConfig Gtcrn;
public int NumThreads;
public int Debug;
[MarshalAs(UnmanagedType.LPStr)]
public string Provider;
}
}
... ...