Committed by
GitHub
Add C# API for speech enhancement GTCRN models (#1990)
正在显示
10 个修改的文件
包含
301 行增加
和
1 行删除
| @@ -35,6 +35,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\ko | @@ -35,6 +35,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\ko | ||
| 35 | EndProject | 35 | EndProject |
| 36 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}" | 36 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}" |
| 37 | EndProject | 37 | EndProject |
| 38 | +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", "speech-enhancement-gtcrn\speech-enhancement-gtcrn.csproj", "{DF2569C6-6011-4716-9538-F9E9069E00EB}" | ||
| 39 | +EndProject | ||
| 38 | Global | 40 | Global |
| 39 | GlobalSection(SolutionConfigurationPlatforms) = preSolution | 41 | GlobalSection(SolutionConfigurationPlatforms) = preSolution |
| 40 | Debug|Any CPU = Debug|Any CPU | 42 | Debug|Any CPU = Debug|Any CPU |
| @@ -105,6 +107,10 @@ Global | @@ -105,6 +107,10 @@ Global | ||
| 105 | {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU | 107 | {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU |
| 106 | {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU | 108 | {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU |
| 107 | {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU | 109 | {EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU |
| 110 | + {DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| 111 | + {DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| 112 | + {DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| 113 | + {DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| 108 | EndGlobalSection | 114 | EndGlobalSection |
| 109 | GlobalSection(SolutionProperties) = preSolution | 115 | GlobalSection(SolutionProperties) = preSolution |
| 110 | HideSolutionNode = FALSE | 116 | HideSolutionNode = FALSE |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +// | ||
| 3 | +// This file shows how to use speech enhancement API with GTCRN models. | ||
| 4 | +// | ||
| 5 | +// 1. Download a model from | ||
| 6 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models | ||
| 7 | +// | ||
| 8 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx | ||
| 9 | +// | ||
| 10 | +// 2. Download a test file | ||
| 11 | +// | ||
| 12 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav | ||
| 13 | +// | ||
| 14 | +// 3. Now run it | ||
| 15 | +// | ||
| 16 | +// dotnet run | ||
| 17 | + | ||
| 18 | +using SherpaOnnx; | ||
| 19 | + | ||
| 20 | +class OfflineSpeechEnhancementDemo | ||
| 21 | +{ | ||
| 22 | + static void Main(string[] args) | ||
| 23 | + { | ||
| 24 | + var config = new OfflineSpeechDenoiserConfig(); | ||
| 25 | + config.Model.Gtcrn.Model = "./gtcrn_simple.onnx"; | ||
| 26 | + config.Model.Debug = 1; | ||
| 27 | + config.Model.NumThreads = 1; | ||
| 28 | + var sd = new OfflineSpeechDenoiser(config); | ||
| 29 | + | ||
| 30 | + WaveReader waveReader = new WaveReader("./inp_16k.wav"); | ||
| 31 | + var denoisedAudio = sd.Run(waveReader.Samples, waveReader.SampleRate); | ||
| 32 | + | ||
| 33 | + var outputFilename = "./enhanced-16k.wav"; | ||
| 34 | + var ok = denoisedAudio.SaveToWaveFile(outputFilename); | ||
| 35 | + | ||
| 36 | + if (ok) | ||
| 37 | + { | ||
| 38 | + Console.WriteLine($"Wrote to {outputFilename} succeeded!"); | ||
| 39 | + } | ||
| 40 | + else | ||
| 41 | + { | ||
| 42 | + Console.WriteLine($"Failed to write {outputFilename}"); | ||
| 43 | + } | ||
| 44 | + } | ||
| 45 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | +set -ex | ||
| 3 | + | ||
| 4 | +if [ ! -f ./gtcrn_simple.onnx ]; then | ||
| 5 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx | ||
| 6 | +fi | ||
| 7 | + | ||
| 8 | +if [ ! -f ./inp_16k.wav ]; then | ||
| 9 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav | ||
| 10 | +fi | ||
| 11 | + | ||
| 12 | +dotnet run |
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + | ||
| 3 | + <PropertyGroup> | ||
| 4 | + <OutputType>Exe</OutputType> | ||
| 5 | + <TargetFramework>net8.0</TargetFramework> | ||
| 6 | + <RootNamespace>speech_enhancement_gtcrn</RootNamespace> | ||
| 7 | + <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | + <Nullable>enable</Nullable> | ||
| 9 | + </PropertyGroup> | ||
| 10 | + | ||
| 11 | + <ItemGroup> | ||
| 12 | + <ProjectReference Include="..\Common\Common.csproj" /> | ||
| 13 | + </ItemGroup> | ||
| 14 | + | ||
| 15 | +</Project> |
scripts/dotnet/DenoisedAudio.cs
0 → 100644
| 1 | +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +using System; | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | +using System.Text; | ||
| 5 | + | ||
| 6 | +namespace SherpaOnnx | ||
| 7 | +{ | ||
| 8 | + public class DenoisedAudio | ||
| 9 | + { | ||
| 10 | + public DenoisedAudio(IntPtr p) | ||
| 11 | + { | ||
| 12 | + _handle = new HandleRef(this, p); | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + public bool SaveToWaveFile(String filename) | ||
| 16 | + { | ||
| 17 | + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); | ||
| 18 | + byte[] utf8Filename = Encoding.UTF8.GetBytes(filename); | ||
| 19 | + byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1]; // +1 for null terminator | ||
| 20 | + Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length); | ||
| 21 | + utf8FilenameWithNull[utf8Filename.Length] = 0; // Null terminator | ||
| 22 | + int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, utf8FilenameWithNull); | ||
| 23 | + return status == 1; | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + ~DenoisedAudio() | ||
| 27 | + { | ||
| 28 | + Cleanup(); | ||
| 29 | + } | ||
| 30 | + | ||
| 31 | + public void Dispose() | ||
| 32 | + { | ||
| 33 | + Cleanup(); | ||
| 34 | + // Prevent the object from being placed on the | ||
| 35 | + // finalization queue | ||
| 36 | + System.GC.SuppressFinalize(this); | ||
| 37 | + } | ||
| 38 | + | ||
| 39 | + private void Cleanup() | ||
| 40 | + { | ||
| 41 | + SherpaOnnxDestroyDenoisedAudio(Handle); | ||
| 42 | + | ||
| 43 | + // Don't permit the handle to be used again. | ||
| 44 | + _handle = new HandleRef(this, IntPtr.Zero); | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + [StructLayout(LayoutKind.Sequential)] | ||
| 48 | + struct Impl | ||
| 49 | + { | ||
| 50 | + public IntPtr Samples; | ||
| 51 | + public int NumSamples; | ||
| 52 | + public int SampleRate; | ||
| 53 | + } | ||
| 54 | + | ||
| 55 | + private HandleRef _handle; | ||
| 56 | + public IntPtr Handle => _handle.Handle; | ||
| 57 | + | ||
| 58 | + public int NumSamples | ||
| 59 | + { | ||
| 60 | + get | ||
| 61 | + { | ||
| 62 | + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); | ||
| 63 | + return impl.NumSamples; | ||
| 64 | + } | ||
| 65 | + } | ||
| 66 | + | ||
| 67 | + public int SampleRate | ||
| 68 | + { | ||
| 69 | + get | ||
| 70 | + { | ||
| 71 | + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); | ||
| 72 | + return impl.SampleRate; | ||
| 73 | + } | ||
| 74 | + } | ||
| 75 | + | ||
| 76 | + public float[] Samples | ||
| 77 | + { | ||
| 78 | + get | ||
| 79 | + { | ||
| 80 | + Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl)); | ||
| 81 | + | ||
| 82 | + float[] samples = new float[impl.NumSamples]; | ||
| 83 | + Marshal.Copy(impl.Samples, samples, 0, impl.NumSamples); | ||
| 84 | + return samples; | ||
| 85 | + } | ||
| 86 | + } | ||
| 87 | + | ||
| 88 | + [DllImport(Dll.Filename)] | ||
| 89 | + private static extern void SherpaOnnxDestroyDenoisedAudio(IntPtr handle); | ||
| 90 | + | ||
| 91 | + [DllImport(Dll.Filename)] | ||
| 92 | + private static extern int SherpaOnnxWriteWave(IntPtr samples, int n, int sample_rate, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Filename); | ||
| 93 | + } | ||
| 94 | +} |
scripts/dotnet/OfflineSpeechDenoiser.cs
0 → 100644
| 1 | +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + public class OfflineSpeechDenoiser: IDisposable | ||
| 8 | + { | ||
| 9 | + public OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config) | ||
| 10 | + { | ||
| 11 | + IntPtr h = SherpaOnnxCreateOfflineSpeechDenoiser(ref config); | ||
| 12 | + _handle = new HandleRef(this, h); | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + public DenoisedAudio Run(float[] samples, int sampleRate) | ||
| 16 | + { | ||
| 17 | + IntPtr p = SherpaOnnxOfflineSpeechDenoiserRun(_handle.Handle, samples, samples.Length, sampleRate); | ||
| 18 | + return new DenoisedAudio(p); | ||
| 19 | + } | ||
| 20 | + | ||
| 21 | + public void Dispose() | ||
| 22 | + { | ||
| 23 | + Cleanup(); | ||
| 24 | + // Prevent the object from being placed on the | ||
| 25 | + // finalization queue | ||
| 26 | + System.GC.SuppressFinalize(this); | ||
| 27 | + } | ||
| 28 | + | ||
| 29 | + ~OfflineSpeechDenoiser() | ||
| 30 | + { | ||
| 31 | + Cleanup(); | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + private void Cleanup() | ||
| 35 | + { | ||
| 36 | + SherpaOnnxDestroyOfflineSpeechDenoiser(_handle.Handle); | ||
| 37 | + | ||
| 38 | + // Don't permit the handle to be used again. | ||
| 39 | + _handle = new HandleRef(this, IntPtr.Zero); | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + private HandleRef _handle; | ||
| 43 | + | ||
| 44 | + public int SampleRate | ||
| 45 | + { | ||
| 46 | + get | ||
| 47 | + { | ||
| 48 | + return SherpaOnnxOfflineSpeechDenoiserGetSampleRate(_handle.Handle); | ||
| 49 | + } | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + [DllImport(Dll.Filename)] | ||
| 53 | + private static extern IntPtr SherpaOnnxCreateOfflineSpeechDenoiser(ref OfflineSpeechDenoiserConfig config); | ||
| 54 | + | ||
| 55 | + [DllImport(Dll.Filename)] | ||
| 56 | + private static extern void SherpaOnnxDestroyOfflineSpeechDenoiser(IntPtr handle); | ||
| 57 | + | ||
| 58 | + [DllImport(Dll.Filename)] | ||
| 59 | + private static extern int SherpaOnnxOfflineSpeechDenoiserGetSampleRate(IntPtr handle); | ||
| 60 | + | ||
| 61 | + [DllImport(Dll.Filename)] | ||
| 62 | + private static extern IntPtr SherpaOnnxOfflineSpeechDenoiserRun(IntPtr handle, float[] samples, int n, int sampleRate); | ||
| 63 | + } | ||
| 64 | +} |
| 1 | +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + [StructLayout(LayoutKind.Sequential)] | ||
| 8 | + public struct OfflineSpeechDenoiserConfig | ||
| 9 | + { | ||
| 10 | + public OfflineSpeechDenoiserConfig() | ||
| 11 | + { | ||
| 12 | + Model = new OfflineSpeechDenoiserModelConfig(); | ||
| 13 | + } | ||
| 14 | + public OfflineSpeechDenoiserModelConfig Model; | ||
| 15 | + } | ||
| 16 | +} |
| 1 | +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + [StructLayout(LayoutKind.Sequential)] | ||
| 8 | + public struct OfflineSpeechDenoiserGtcrnModelConfig | ||
| 9 | + { | ||
| 10 | + public OfflineSpeechDenoiserGtcrnModelConfig() | ||
| 11 | + { | ||
| 12 | + Model = ""; | ||
| 13 | + } | ||
| 14 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 15 | + public string Model; | ||
| 16 | + } | ||
| 17 | +} |
| 1 | +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + [StructLayout(LayoutKind.Sequential)] | ||
| 8 | + public struct OfflineSpeechDenoiserModelConfig | ||
| 9 | + { | ||
| 10 | + public OfflineSpeechDenoiserModelConfig() | ||
| 11 | + { | ||
| 12 | + Gtcrn = new OfflineSpeechDenoiserGtcrnModelConfig(); | ||
| 13 | + NumThreads = 1; | ||
| 14 | + Debug = 0; | ||
| 15 | + Provider = "cpu"; | ||
| 16 | + } | ||
| 17 | + | ||
| 18 | + public OfflineSpeechDenoiserGtcrnModelConfig Gtcrn; | ||
| 19 | + | ||
| 20 | + public int NumThreads; | ||
| 21 | + | ||
| 22 | + public int Debug; | ||
| 23 | + | ||
| 24 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 25 | + public string Provider; | ||
| 26 | + } | ||
| 27 | +} |
-
请 注册 或 登录 后发表评论