Fangjun Kuang
Committed by GitHub

Wrap punctuation APIs to C#. (#945)

... ... @@ -2,7 +2,10 @@
cd dotnet-examples/
cd speaker-identification
cd offline-punctuation
./run.sh
cd ../speaker-identification
./run.sh
cd ../streaming-hlg-decoding/
... ...
... ... @@ -196,6 +196,7 @@ jobs:
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification
cp -v scripts/dotnet/examples/offline-punctuation.csproj dotnet-examples/offline-punctuation
ls -lh /tmp
... ...
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to add punctuations to text.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
//
// 3. Now run it
//
// dotnet run
using SherpaOnnx;
using System.Collections.Generic;
using System;
class OfflinePunctuationDemo
{
static void Main(string[] args)
{
var config = new OfflinePunctuationConfig();
config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
config.Model.Debug = 1;
config.Model.NumThreads = 1;
var punct = new OfflinePunctuation(config);
string[] textList = new string[] {
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
"我们都是木头人不会说话不会动",
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
};
Console.WriteLine("---------");
foreach (string text in textList)
{
string textWithPunct = punct.AddPunct(text);
Console.WriteLine("Input text: {0}", text);
Console.WriteLine("Output text: {0}", textWithPunct);
Console.WriteLine("---------");
}
}
}
... ...
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>offline_punctuation</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>
... ...
#!/usr/bin/env bash
set -ex
if [ ! -e ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
fi
dotnet run
... ...
... ... @@ -19,6 +19,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "s
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
... ... @@ -60,5 +62,9 @@ Global
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.Build.0 = Release|Any CPU
{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
... ...
... ... @@ -8,7 +8,6 @@ using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineLMConfig
{
... ... @@ -22,5 +21,4 @@ namespace SherpaOnnx
public float Scale;
}
}
... ...
... ... @@ -8,7 +8,6 @@ using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineModelConfig
{
... ... @@ -44,6 +43,4 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string ModelType;
}
}
... ...
... ... @@ -8,7 +8,6 @@ using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineNemoEncDecCtcModelConfig
{
... ...
... ... @@ -18,5 +18,4 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string Model;
}
}
... ...
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Linq;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using System;
namespace SherpaOnnx
{
public class OfflinePunctuation : IDisposable
{
public OfflinePunctuation(OfflinePunctuationConfig config)
{
IntPtr h = SherpaOnnxCreateOfflinePunctuation(ref config);
_handle = new HandleRef(this, h);
}
public String AddPunct(String text)
{
IntPtr p = SherpaOfflinePunctuationAddPunct(_handle.Handle, text);
string s = "";
int length = 0;
unsafe
{
byte* b = (byte*)p;
if (b != null)
{
while (*b != 0)
{
++b;
length += 1;
}
}
}
if (length > 0)
{
byte[] stringBuffer = new byte[length];
Marshal.Copy(p, stringBuffer, 0, length);
s = Encoding.UTF8.GetString(stringBuffer);
}
SherpaOfflinePunctuationFreeText(p);
return s;
}
public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}
~OfflinePunctuation()
{
Cleanup();
}
private void Cleanup()
{
SherpaOnnxDestroyOfflinePunctuation(_handle.Handle);
// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}
private HandleRef _handle;
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOnnxCreateOfflinePunctuation(ref OfflinePunctuationConfig config);
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxDestroyOfflinePunctuation(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern IntPtr SherpaOfflinePunctuationAddPunct(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text);
[DllImport(Dll.Filename)]
private static extern void SherpaOfflinePunctuationFreeText(IntPtr p);
}
}
... ...
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Linq;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflinePunctuationConfig
{
public OfflinePunctuationConfig()
{
Model = new OfflinePunctuationModelConfig();
}
public OfflinePunctuationModelConfig Model;
}
}
... ...
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Linq;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflinePunctuationModelConfig
{
public OfflinePunctuationModelConfig()
{
CtTransformer = "";
NumThreads = 1;
Debug = 0;
Provider = "cpu";
}
[MarshalAs(UnmanagedType.LPStr)]
public string CtTransformer;
public int NumThreads;
public int Debug;
[MarshalAs(UnmanagedType.LPStr)]
public string Provider;
}
}
... ...
... ... @@ -72,5 +72,4 @@ namespace SherpaOnnx
[DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOfflineStreams")]
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
}
}
... ...
... ... @@ -8,7 +8,6 @@ using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineRecognizerConfig
{
... ... @@ -38,6 +37,4 @@ namespace SherpaOnnx
public float HotwordsScore;
}
}
... ...
... ... @@ -8,7 +8,6 @@ using System;
namespace SherpaOnnx
{
public class OfflineRecognizerResult
{
public OfflineRecognizerResult(IntPtr handle)
... ... @@ -44,6 +43,4 @@ namespace SherpaOnnx
private String _text;
public String Text => _text;
}
}
... ...
... ... @@ -8,7 +8,6 @@ using System;
namespace SherpaOnnx
{
public class OfflineStream : IDisposable
{
public OfflineStream(IntPtr p)
... ... @@ -68,5 +67,4 @@ namespace SherpaOnnx
[DllImport(Dll.Filename, EntryPoint = "DestroyOfflineRecognizerResult")]
private static extern void DestroyResult(IntPtr handle);
}
}
... ...
... ... @@ -18,5 +18,4 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string Model;
}
}
... ...
... ... @@ -26,5 +26,4 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string Joiner;
}
}
... ...
... ... @@ -28,5 +28,4 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string RuleFars;
}
}
... ...
... ... @@ -8,7 +8,6 @@ using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineTtsModelConfig
{
... ...
... ... @@ -33,5 +33,4 @@ namespace SherpaOnnx
public int TailPaddings;
}
}
... ...
... ... @@ -24,5 +24,4 @@ namespace SherpaOnnx
public int MaxActive;
}
}
... ...
... ... @@ -10,7 +10,6 @@ using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OnlineModelConfig
{
... ... @@ -45,5 +44,4 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string ModelType;
}
}
... ...
... ... @@ -10,7 +10,6 @@ using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OnlineParaformerModelConfig
{
... ... @@ -26,5 +25,4 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string Decoder;
}
}
... ...
... ... @@ -10,7 +10,6 @@ using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OnlineRecognizerConfig
{
... ... @@ -66,5 +65,4 @@ namespace SherpaOnnx
public OnlineCtcFstDecoderConfig CtcFstDecoderConfig;
}
}
... ...
... ... @@ -10,7 +10,6 @@ using System;
namespace SherpaOnnx
{
public class OnlineRecognizerResult
{
public OnlineRecognizerResult(IntPtr handle)
... ...
... ... @@ -10,7 +10,6 @@ using System;
namespace SherpaOnnx
{
public class OnlineStream : IDisposable
{
public OnlineStream(IntPtr p)
... ... @@ -61,5 +60,4 @@ namespace SherpaOnnx
[DllImport(Dll.Filename)]
private static extern void InputFinished(IntPtr handle);
}
}
... ...
... ... @@ -10,7 +10,6 @@ using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OnlineTransducerModelConfig
{
... ... @@ -30,5 +29,4 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string Joiner;
}
}
... ...
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>offline_punctuation</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<PropertyGroup>
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>
... ...