Fangjun Kuang
Committed by GitHub

Add C# API for speech enhancement GTCRN models (#1990)

@@ -2,7 +2,11 @@ @@ -2,7 +2,11 @@
2 2
3 cd dotnet-examples/ 3 cd dotnet-examples/
4 4
5 -cd ./kokoro-tts 5 +cd ./speech-enhancement-gtcrn
  6 +./run.sh
  7 +ls -lh
  8 +
  9 +cd ../kokoro-tts
6 ./run-kokoro.sh 10 ./run-kokoro.sh
7 ls -lh 11 ls -lh
8 12
@@ -35,6 +35,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\ko @@ -35,6 +35,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\ko
35 EndProject 35 EndProject
36 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}" 36 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
37 EndProject 37 EndProject
  38 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", "speech-enhancement-gtcrn\speech-enhancement-gtcrn.csproj", "{DF2569C6-6011-4716-9538-F9E9069E00EB}"
  39 +EndProject
38 Global 40 Global
39 GlobalSection(SolutionConfigurationPlatforms) = preSolution 41 GlobalSection(SolutionConfigurationPlatforms) = preSolution
40 Debug|Any CPU = Debug|Any CPU 42 Debug|Any CPU = Debug|Any CPU
@@ -105,6 +107,10 @@ Global @@ -105,6 +107,10 @@ Global
105 {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU 107 {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
106 {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU 108 {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
107 {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU 109 {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
  110 + {DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
  111 + {DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
  112 + {DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
  113 + {DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.Build.0 = Release|Any CPU
108 EndGlobalSection 114 EndGlobalSection
109 GlobalSection(SolutionProperties) = preSolution 115 GlobalSection(SolutionProperties) = preSolution
110 HideSolutionNode = FALSE 116 HideSolutionNode = FALSE
  1 +// Copyright (c) 2025 Xiaomi Corporation
  2 +//
  3 +// This file shows how to use speech enhancement API with GTCRN models.
  4 +//
  5 +// 1. Download a model from
  6 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  7 +//
  8 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  9 +//
  10 +// 2. Download a test file
  11 +//
  12 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
  13 +//
  14 +// 3. Now run it
  15 +//
  16 +// dotnet run
  17 +
  18 +using SherpaOnnx;
  19 +
  20 +class OfflineSpeechEnhancementDemo
  21 +{
  22 + static void Main(string[] args)
  23 + {
  24 + var config = new OfflineSpeechDenoiserConfig();
  25 + config.Model.Gtcrn.Model = "./gtcrn_simple.onnx";
  26 + config.Model.Debug = 1;
  27 + config.Model.NumThreads = 1;
  28 + var sd = new OfflineSpeechDenoiser(config);
  29 +
  30 + WaveReader waveReader = new WaveReader("./inp_16k.wav");
  31 + var denoisedAudio = sd.Run(waveReader.Samples, waveReader.SampleRate);
  32 +
  33 + var outputFilename = "./enhanced-16k.wav";
  34 + var ok = denoisedAudio.SaveToWaveFile(outputFilename);
  35 +
  36 + if (ok)
  37 + {
  38 + Console.WriteLine($"Wrote to {outputFilename} succeeded!");
  39 + }
  40 + else
  41 + {
  42 + Console.WriteLine($"Failed to write {outputFilename}");
  43 + }
  44 + }
  45 +}
  1 +#!/usr/bin/env bash
  2 +set -ex
  3 +
  4 +if [ ! -f ./gtcrn_simple.onnx ]; then
  5 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  6 +fi
  7 +
  8 +if [ ! -f ./inp_16k.wav ]; then
  9 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
  10 +fi
  11 +
  12 +dotnet run
  1 +<Project Sdk="Microsoft.NET.Sdk">
  2 +
  3 + <PropertyGroup>
  4 + <OutputType>Exe</OutputType>
  5 + <TargetFramework>net8.0</TargetFramework>
  6 + <RootNamespace>speech_enhancement_gtcrn</RootNamespace>
  7 + <ImplicitUsings>enable</ImplicitUsings>
  8 + <Nullable>enable</Nullable>
  9 + </PropertyGroup>
  10 +
  11 + <ItemGroup>
  12 + <ProjectReference Include="..\Common\Common.csproj" />
  13 + </ItemGroup>
  14 +
  15 +</Project>
  1 +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +using System;
  3 +using System.Runtime.InteropServices;
  4 +using System.Text;
  5 +
  6 +namespace SherpaOnnx
  7 +{
  8 + public class DenoisedAudio
  9 + {
  10 + public DenoisedAudio(IntPtr p)
  11 + {
  12 + _handle = new HandleRef(this, p);
  13 + }
  14 +
  15 + public bool SaveToWaveFile(String filename)
  16 + {
  17 + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
  18 + byte[] utf8Filename = Encoding.UTF8.GetBytes(filename);
  19 + byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1]; // +1 for null terminator
  20 + Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length);
  21 + utf8FilenameWithNull[utf8Filename.Length] = 0; // Null terminator
  22 + int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, utf8FilenameWithNull);
  23 + return status == 1;
  24 + }
  25 +
  26 + ~DenoisedAudio()
  27 + {
  28 + Cleanup();
  29 + }
  30 +
  31 + public void Dispose()
  32 + {
  33 + Cleanup();
  34 + // Prevent the object from being placed on the
  35 + // finalization queue
  36 + System.GC.SuppressFinalize(this);
  37 + }
  38 +
  39 + private void Cleanup()
  40 + {
  41 + SherpaOnnxDestroyDenoisedAudio(Handle);
  42 +
  43 + // Don't permit the handle to be used again.
  44 + _handle = new HandleRef(this, IntPtr.Zero);
  45 + }
  46 +
  47 + [StructLayout(LayoutKind.Sequential)]
  48 + struct Impl
  49 + {
  50 + public IntPtr Samples;
  51 + public int NumSamples;
  52 + public int SampleRate;
  53 + }
  54 +
  55 + private HandleRef _handle;
  56 + public IntPtr Handle => _handle.Handle;
  57 +
  58 + public int NumSamples
  59 + {
  60 + get
  61 + {
  62 + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
  63 + return impl.NumSamples;
  64 + }
  65 + }
  66 +
  67 + public int SampleRate
  68 + {
  69 + get
  70 + {
  71 + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
  72 + return impl.SampleRate;
  73 + }
  74 + }
  75 +
  76 + public float[] Samples
  77 + {
  78 + get
  79 + {
  80 + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
  81 +
  82 + float[] samples = new float[impl.NumSamples];
  83 + Marshal.Copy(impl.Samples, samples, 0, impl.NumSamples);
  84 + return samples;
  85 + }
  86 + }
  87 +
  88 + [DllImport(Dll.Filename)]
  89 + private static extern void SherpaOnnxDestroyDenoisedAudio(IntPtr handle);
  90 +
  91 + [DllImport(Dll.Filename)]
  92 + private static extern int SherpaOnnxWriteWave(IntPtr samples, int n, int sample_rate, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Filename);
  93 + }
  94 +}
  1 +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +using System.Runtime.InteropServices;
  4 +
  5 +namespace SherpaOnnx
  6 +{
  7 + public class OfflineSpeechDenoiser: IDisposable
  8 + {
  9 + public OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config)
  10 + {
  11 + IntPtr h = SherpaOnnxCreateOfflineSpeechDenoiser(ref config);
  12 + _handle = new HandleRef(this, h);
  13 + }
  14 +
  15 + public DenoisedAudio Run(float[] samples, int sampleRate)
  16 + {
  17 + IntPtr p = SherpaOnnxOfflineSpeechDenoiserRun(_handle.Handle, samples, samples.Length, sampleRate);
  18 + return new DenoisedAudio(p);
  19 + }
  20 +
  21 + public void Dispose()
  22 + {
  23 + Cleanup();
  24 + // Prevent the object from being placed on the
  25 + // finalization queue
  26 + System.GC.SuppressFinalize(this);
  27 + }
  28 +
  29 + ~OfflineSpeechDenoiser()
  30 + {
  31 + Cleanup();
  32 + }
  33 +
  34 + private void Cleanup()
  35 + {
  36 + SherpaOnnxDestroyOfflineSpeechDenoiser(_handle.Handle);
  37 +
  38 + // Don't permit the handle to be used again.
  39 + _handle = new HandleRef(this, IntPtr.Zero);
  40 + }
  41 +
  42 + private HandleRef _handle;
  43 +
  44 + public int SampleRate
  45 + {
  46 + get
  47 + {
  48 + return SherpaOnnxOfflineSpeechDenoiserGetSampleRate(_handle.Handle);
  49 + }
  50 + }
  51 +
  52 + [DllImport(Dll.Filename)]
  53 + private static extern IntPtr SherpaOnnxCreateOfflineSpeechDenoiser(ref OfflineSpeechDenoiserConfig config);
  54 +
  55 + [DllImport(Dll.Filename)]
  56 + private static extern void SherpaOnnxDestroyOfflineSpeechDenoiser(IntPtr handle);
  57 +
  58 + [DllImport(Dll.Filename)]
  59 + private static extern int SherpaOnnxOfflineSpeechDenoiserGetSampleRate(IntPtr handle);
  60 +
  61 + [DllImport(Dll.Filename)]
  62 + private static extern IntPtr SherpaOnnxOfflineSpeechDenoiserRun(IntPtr handle, float[] samples, int n, int sampleRate);
  63 + }
  64 +}
  1 +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +using System.Runtime.InteropServices;
  4 +
  5 +namespace SherpaOnnx
  6 +{
  7 + [StructLayout(LayoutKind.Sequential)]
  8 + public struct OfflineSpeechDenoiserConfig
  9 + {
  10 + public OfflineSpeechDenoiserConfig()
  11 + {
  12 + Model = new OfflineSpeechDenoiserModelConfig();
  13 + }
  14 + public OfflineSpeechDenoiserModelConfig Model;
  15 + }
  16 +}
  1 +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +using System.Runtime.InteropServices;
  4 +
  5 +namespace SherpaOnnx
  6 +{
  7 + [StructLayout(LayoutKind.Sequential)]
  8 + public struct OfflineSpeechDenoiserGtcrnModelConfig
  9 + {
  10 + public OfflineSpeechDenoiserGtcrnModelConfig()
  11 + {
  12 + Model = "";
  13 + }
  14 + [MarshalAs(UnmanagedType.LPStr)]
  15 + public string Model;
  16 + }
  17 +}
  1 +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +using System.Runtime.InteropServices;
  4 +
  5 +namespace SherpaOnnx
  6 +{
  7 + [StructLayout(LayoutKind.Sequential)]
  8 + public struct OfflineSpeechDenoiserModelConfig
  9 + {
  10 + public OfflineSpeechDenoiserModelConfig()
  11 + {
  12 + Gtcrn = new OfflineSpeechDenoiserGtcrnModelConfig();
  13 + NumThreads = 1;
  14 + Debug = 0;
  15 + Provider = "cpu";
  16 + }
  17 +
  18 + public OfflineSpeechDenoiserGtcrnModelConfig Gtcrn;
  19 +
  20 + public int NumThreads;
  21 +
  22 + public int Debug;
  23 +
  24 + [MarshalAs(UnmanagedType.LPStr)]
  25 + public string Provider;
  26 + }
  27 +}