木子李
Committed by GitHub

Add C# TTS API (#399)

  1 +using NAudio.Wave;
  2 +
  3 +namespace TTS.Struct
  4 +{
  5 + public sealed partial class SherpaOnnxGeneratedAudioResult
  6 + {
  7 + private WaveOutEvent waveOut;
  8 + private WaveFormat waveFormat;
  9 + private BufferedWaveProvider bufferedWaveProvider;
  10 +
  11 + private int bufferLength = 1;
  12 +
  13 + public TimeSpan? AudioDuration => bufferedWaveProvider?.BufferedDuration;
  14 +
  15 + public float PlayProgress => (waveOut?.GetPosition() * 1.0f / bufferLength).Value;
  16 +
  17 + public void Play()
  18 + {
  19 + waveOut ??= new WaveOutEvent();
  20 +
  21 + waveFormat ??= new WaveFormat(sample_rate, AudioDataBit, Channels); // 32-bit 浮点,单声道
  22 +
  23 + if (bufferedWaveProvider == null)
  24 + {
  25 + bufferedWaveProvider ??= new BufferedWaveProvider(waveFormat);
  26 +
  27 + var buffer = AudioByteData;
  28 +
  29 + bufferLength = buffer.Length;
  30 +
  31 + bufferedWaveProvider.AddSamples(buffer, 0, bufferLength);
  32 + bufferedWaveProvider.BufferLength = bufferLength;
  33 + waveOut.Init(bufferedWaveProvider);
  34 + }
  35 + waveOut.Play();
  36 + }
  37 +
  38 + public void Stop()
  39 + {
  40 + waveOut?.Stop();
  41 + }
  42 +
  43 + }
  44 +}
  1 +using System.Text;
  2 +using TTS;
  3 +using TTS.Struct;
  4 +
  5 +internal class Program
  6 +{
  7 + private static void Main(string[] args)
  8 + {
  9 + SherpaOnnxOfflineTtsConfig sherpaOnnxOfflineTtsConfig = new SherpaOnnxOfflineTtsConfig();
  10 + sherpaOnnxOfflineTtsConfig.model = new SherpaOnnxOfflineTtsModelConfig
  11 + {
  12 + debug = 0,
  13 + num_threads = 4,
  14 + provider = "cpu",
  15 + vits = new SherpaOnnxOfflineTtsVitsModelConfig
  16 + {
  17 + lexicon = "vits-zh-aishell3/lexicon.txt",
  18 + model = "vits-zh-aishell3/vits-aishell3.onnx",
  19 + tokens = "vits-zh-aishell3/tokens.txt",
  20 +
  21 + noise_scale = 0.667f,
  22 + noise_scale_w = 0.8f,
  23 + length_scale = 1,
  24 + },
  25 +
  26 + };
  27 +
  28 + TTSCore i = new TTSCore(sherpaOnnxOfflineTtsConfig);
  29 +
  30 + Console.InputEncoding = Encoding.Unicode;
  31 + Console.OutputEncoding = Encoding.UTF8;
  32 +
  33 + while (true)
  34 + {
  35 + var str = Console.ReadLine();
  36 + var audioResult = i.ToSpeech(str, 40, 1f);
  37 +
  38 + // audioResult.WriteWAVFile("123.wav");保存本地
  39 +
  40 + audioResult.Play();
  41 +
  42 + int lastIndex = -1;
  43 + while (audioResult.PlayProgress <= 1f)
  44 + {
  45 + int index = (int)(audioResult.PlayProgress * (str.Length - 1));
  46 + if (lastIndex != index)
  47 + {
  48 + Console.Write(str[index]);
  49 + lastIndex = index;
  50 + }
  51 + Thread.Sleep(100);
  52 + }
  53 +
  54 + if (++lastIndex < str.Length)
  55 + Console.Write(str[lastIndex]);
  56 +
  57 + Console.WriteLine();
  58 +
  59 + }
  60 +
  61 + }
  62 +}
  1 +using System;
  2 +using System.Collections.Generic;
  3 +using System.Linq;
  4 +using System.Runtime.InteropServices;
  5 +using System.Text;
  6 +using System.Threading.Tasks;
  7 +
  8 +namespace TTS.Struct
  9 +{
  10 + /// <summary>
  11 + /// 生成语音结果
  12 + /// </summary>
  13 + public sealed partial class SherpaOnnxGeneratedAudioResult : IDisposable
  14 + {
  15 + public const string Filename = "sherpa-onnx-c-api";
  16 +
  17 + /// <summary>
  18 + /// 销毁非托管内存
  19 + /// </summary>
  20 + /// <param name="ttsGenerateIntptr"></param>
  21 + [DllImport(Filename)]
  22 + private static extern void SherpaOnnxDestroyOfflineTtsGeneratedAudio(IntPtr ttsGenerateIntptr);
  23 +
  24 + [DllImport(Filename)]
  25 + private static extern int SherpaOnnxWriteWave(IntPtr q, int n, int sample_rate, string filename);
  26 +
  27 + /// <summary>
  28 + /// 音频数据比特
  29 + /// </summary>
  30 + public const int AudioDataBit = 16;
  31 + /// <summary>
  32 + /// 单通道
  33 + /// </summary>
  34 + public const int Channels = 1;
  35 +
  36 + /// <summary>
  37 + /// 原生句柄
  38 + /// </summary>
  39 + internal IntPtr thisHandle;
  40 +
  41 + internal readonly IntPtr audioData;
  42 + internal readonly int dataSize;
  43 +
  44 + /// <summary>
  45 + /// 采样率
  46 + /// </summary>
  47 + public readonly int sample_rate;
  48 +
  49 + /// <summary>
  50 + /// 音频数据指针
  51 + /// </summary>
  52 + public IntPtr AudioDataIntPtr => audioData;
  53 +
  54 + /// <summary>
  55 + /// 数据的大小
  56 + /// </summary>
  57 + public unsafe int AudioDataLength
  58 + {
  59 + get
  60 + {
  61 + return dataSize;
  62 +
  63 + //float* buffer = (float*)audioData;
  64 + //while (*buffer != 0)
  65 + // ++buffer;
  66 + //return (int)(buffer - (float*)audioData);
  67 + }
  68 + }
  69 +
  70 + /// <summary>
  71 + /// 获得音频数据 float[]
  72 + /// 这个内部创建一个数组
  73 + /// </summary>
  74 + public unsafe float[] AudioFloatData
  75 + {
  76 + get
  77 + {
  78 + int length = AudioDataLength;
  79 +
  80 + float[] floatAudioData = new float[length];
  81 + Marshal.Copy(audioData, floatAudioData, 0, floatAudioData.Length);
  82 + return floatAudioData;
  83 + }
  84 + }
  85 +
  86 +
  87 + /// <summary>
  88 + /// 获得音频数据 byte[]
  89 + /// 这个内部创建一个数组
  90 + /// </summary>
  91 + public byte[] AudioByteData
  92 + {
  93 + get
  94 + {
  95 + byte[] bytes = new byte[AudioDataLength * 2];
  96 + ReadData(bytes, 0);
  97 + return bytes;
  98 + }
  99 + }
  100 +
  101 + internal SherpaOnnxGeneratedAudioResult(IntPtr intPtr, SherpaOnnxGeneratedAudio sherpaOnnx)
  102 + {
  103 + this.thisHandle = intPtr;
  104 + this.audioData = sherpaOnnx.audioData;
  105 + this.dataSize = sherpaOnnx.dataSize;
  106 + this.sample_rate = sherpaOnnx.sample_rate;
  107 + }
  108 +
  109 + ~SherpaOnnxGeneratedAudioResult()
  110 + {
  111 + Dispose();
  112 + }
  113 +
  114 + /// <summary>
  115 + /// 读取数据
  116 + /// 没有垃圾产生,自己传递数组进来
  117 + /// </summary>
  118 + /// <param name="audioFloats">数组</param>
  119 + /// <param name="offset">数组那个位置写入</param>
  120 + /// <returns>写入了多少个</returns>
  121 + public int ReadData(float[] audioFloats, int offset)
  122 + {
  123 + int length = AudioDataLength;
  124 +
  125 + int c = audioFloats.Length - offset;
  126 + length = c >= length ? length : c;
  127 +
  128 + Marshal.Copy(audioData, audioFloats, offset, length);
  129 + return length;
  130 + }
  131 +
  132 + /// <summary>
  133 + /// 读取数据
  134 + /// 这个内部转换成byte[] 音频数组
  135 + /// 没有垃圾产生,自己传递数组进来
  136 + /// </summary>
  137 + /// <param name="audioFloats">数组,这个长度需要是AudioDataLength*2大小</param>
  138 + /// <param name="offset">数组那个位置写入</param>
  139 + /// <returns>写入了多少个</returns>
  140 + public int ReadData(byte[] audioFloats, int offset)
  141 + {
  142 + //因为是16bit存储音频数据,所以float会转换成两个字节存储
  143 + var audiodata = AudioFloatData;
  144 +
  145 + int length = audiodata.Length * 2;
  146 +
  147 + int c = audioFloats.Length - offset;
  148 + c = c % 2 == 0 ? c : c - 1;
  149 +
  150 + length = c >= length ? length : c;
  151 +
  152 + int p = length / 2;
  153 +
  154 + for (int i = 0; i < p; i++)
  155 + {
  156 + short value = (short)(audiodata[i] * short.MaxValue);
  157 +
  158 + audioFloats[offset++] = (byte)value;
  159 + audioFloats[offset++] = (byte)(value >> 8);
  160 + }
  161 +
  162 + return length;
  163 +
  164 + }
  165 +
  166 + /// <summary>
  167 + /// 写入WAV音频数据
  168 + /// </summary>
  169 + /// <param name="filename"></param>
  170 + /// <returns></returns>
  171 + public bool WriteWAVFile(string filename)
  172 + {
  173 + return 1 == SherpaOnnxWriteWave(audioData, this.dataSize, this.sample_rate, filename);
  174 + }
  175 +
  176 + public void Dispose()
  177 + {
  178 + if (this.thisHandle != IntPtr.Zero)
  179 + {
  180 + SherpaOnnxDestroyOfflineTtsGeneratedAudio(this.thisHandle);
  181 + GC.SuppressFinalize(this);
  182 + this.thisHandle = IntPtr.Zero;
  183 + }
  184 + }
  185 + }
  186 +
  187 + [StructLayout(LayoutKind.Sequential)]
  188 + internal struct SherpaOnnxGeneratedAudio
  189 + {
  190 + internal readonly IntPtr audioData;
  191 + internal readonly int dataSize;
  192 +
  193 + /// <summary>
  194 + /// 采样率
  195 + /// </summary>
  196 + public readonly int sample_rate;
  197 + }
  198 +}
  1 +using System.Runtime.InteropServices;
  2 +
  3 +namespace TTS.Struct
  4 +{
  5 + [StructLayout(LayoutKind.Sequential)]
  6 + public struct SherpaOnnxOfflineTtsConfig
  7 + {
  8 + public SherpaOnnxOfflineTtsModelConfig model;
  9 + }
  10 +}
  1 +using System.Runtime.InteropServices;
  2 +
  3 +namespace TTS.Struct
  4 +{
  5 + [StructLayout(LayoutKind.Sequential)]
  6 + public struct SherpaOnnxOfflineTtsModelConfig
  7 + {
  8 + /// <summary>
  9 + /// 模型配置
  10 + /// </summary>
  11 + public SherpaOnnxOfflineTtsVitsModelConfig vits;
  12 + /// <summary>
  13 + /// 线程数
  14 + /// </summary>
  15 + public int num_threads;
  16 + public int debug;
  17 + /// <summary>
  18 + /// 使用cpu
  19 + /// </summary>
  20 + [MarshalAs(UnmanagedType.LPStr)]
  21 + public string provider;
  22 + }
  23 +}
  1 +using System.Runtime.InteropServices;
  2 +
  3 +namespace TTS.Struct
  4 +{
  5 + [StructLayout(LayoutKind.Sequential)]
  6 + public struct SherpaOnnxOfflineTtsVitsModelConfig
  7 + {
  8 + /// <summary>
  9 + /// 模型
  10 + /// "vits-zh-aishell3/vits-aishell3.onnx"
  11 + /// </summary>
  12 + [MarshalAs(UnmanagedType.LPStr)]
  13 + public string model;
  14 + /// <summary>
  15 + /// 词典文件
  16 + /// "vits-zh-aishell3/lexicon.txt"
  17 + /// </summary>
  18 + [MarshalAs(UnmanagedType.LPStr)]
  19 + public string lexicon;
  20 +
  21 + [MarshalAs(UnmanagedType.LPStr)]
  22 + public string tokens;
  23 +
  24 + /// <summary>
  25 + /// VITS模型的noise_scale (float,默认值= 0.667)
  26 + /// </summary>
  27 + public float noise_scale = 0.667f;
  28 + /// <summary>
  29 + /// VITS模型的noise_scale_w (float,默认值= 0.8)
  30 + /// </summary>
  31 + public float noise_scale_w = 0.8f;
  32 + /// <summary>
  33 + /// 演讲的速度。大→慢;小→更快。(float, default = 1)
  34 + /// </summary>
  35 + public float length_scale = 1f;
  36 +
  37 + public SherpaOnnxOfflineTtsVitsModelConfig()
  38 + {
  39 + noise_scale = 0.667f;
  40 + noise_scale_w = 0.8f;
  41 + length_scale = 1f;
  42 +
  43 + model = "vits-zh-aishell3/vits-aishell3.onnx";
  44 + lexicon = "vits-zh-aishell3/lexicon.txt";
  45 + tokens = "vits-zh-aishell3/tokens.txt";
  46 + }
  47 + }
  48 +}
  1 +using System.Runtime.InteropServices;
  2 +using TTS.Struct;
  3 +
  4 +namespace TTS
  5 +{
  6 + internal sealed class TTSCore : IDisposable
  7 + {
  8 + public const string Filename = "sherpa-onnx-c-api";
  9 +
  10 + [DllImport(Filename)]
  11 + private static extern IntPtr SherpaOnnxCreateOfflineTts(SherpaOnnxOfflineTtsConfig handle);
  12 +
  13 + [DllImport(Filename)]
  14 + private static extern IntPtr SherpaOnnxOfflineTtsGenerate(IntPtr createOfflineTtsIntptr, IntPtr text, int sid, float speed);
  15 +
  16 + [DllImport(Filename)]
  17 + private static extern void SherpaOnnxDestroyOfflineTts(IntPtr intPtr);
  18 +
  19 + /// <summary>
  20 + /// 原生句柄
  21 + /// </summary>
  22 + private IntPtr thisHandle;
  23 +
  24 + public TTSCore(SherpaOnnxOfflineTtsConfig modelConfig)
  25 + {
  26 + thisHandle = SherpaOnnxCreateOfflineTts(modelConfig);
  27 + }
  28 +
  29 + /// <summary>
  30 + /// 文字转语音
  31 + /// </summary>
  32 + /// <param name="text">文字</param>
  33 + /// <param name="sid">音色</param>
  34 + /// <param name="speed">速度</param>
  35 + /// <returns></returns>
  36 + public SherpaOnnxGeneratedAudioResult ToSpeech(string text, int sid, float speed = 1f)
  37 + {
  38 + var result = SherpaOnnxOfflineTtsGenerate(thisHandle, Marshal.StringToCoTaskMemUTF8(text), sid, speed);
  39 + SherpaOnnxGeneratedAudio impl = (SherpaOnnxGeneratedAudio)Marshal.PtrToStructure(result, typeof(SherpaOnnxGeneratedAudio));
  40 + return new SherpaOnnxGeneratedAudioResult(result, impl);
  41 + }
  42 +
  43 + /// <summary>
  44 + /// 文字转语音
  45 + /// </summary>
  46 + /// <param name="text">文字</param>
  47 + /// <param name="sid">音色</param>
  48 + /// <param name="speed">速度</param>
  49 + /// <returns></returns>
  50 + public Task<SherpaOnnxGeneratedAudioResult> ToSpeechAsync(string text, int sid, float speed = 1f)
  51 + {
  52 + return Task.Run(() => ToSpeech(text, sid, speed));
  53 + }
  54 +
  55 + ~TTSCore()
  56 + {
  57 + Dispose();
  58 + }
  59 +
  60 + public void Dispose()
  61 + {
  62 + if (this.thisHandle != IntPtr.Zero)
  63 + {
  64 + SherpaOnnxDestroyOfflineTts(this.thisHandle);
  65 + GC.SuppressFinalize(this);
  66 + this.thisHandle = IntPtr.Zero;
  67 + }
  68 + }
  69 + }
  70 +}