Add C# API for Moonshine models. (#1483)

* Also, return timestamps for non-streaming ASR.

Add C# API for Moonshine models. (#1483)
* Also, return timestamps for non-streaming ASR.
Fangjun Kuang · GitHub
Commit 3622104133701514f30d555d8c4171bf3bac85f6 36221041 1 parent cdd8e1bb
.github/scripts/test-dot-net.sh
dotnet-examples/offline-decode-files/Program.cs
dotnet-examples/offline-decode-files/run-moonshine.sh
scripts/dotnet/OfflineModelConfig.cs
scripts/dotnet/OfflineMoonshineModelConfig.cs
scripts/dotnet/OfflineRecognizerResult.cs
--- a/.github/scripts/test-dot-net.sh
查看文件 @3622104
+++ b/.github/scripts/test-dot-net.sh
查看文件 @3622104
@@ -9,6 +9,9 @@ rm -fv *.wav
 rm -rfv sherpa-onnx-pyannote-*
 
 cd ../offline-decode-files
+ ./run-moonshine.sh
+ rm -rf sherpa-onnx-*
+ 
 ./run-sense-voice-ctc.sh
 rm -rf sherpa-onnx-*
 
--- a/dotnet-examples/offline-decode-files/Program.cs
查看文件 @3622104
+++ b/dotnet-examples/offline-decode-files/Program.cs
查看文件 @3622104
@@ -17,7 +17,7 @@ class OfflineDecodeFiles
   {
 
     [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
-     public int SampleRate { get; set; }  = 16000;
+     public int SampleRate { get; set; } = 16000;
 
     [Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")]
     public int FeatureDim { get; set; } = 80;
@@ -31,7 +31,7 @@ class OfflineDecodeFiles
     [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
     public string Decoder { get; set; } = "";
 
-     [Option(Required = false,  Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
+     [Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
     public string Joiner { get; set; } = "";
 
     [Option("model-type", Required = false, Default = "", HelpText = "model type")]
@@ -44,10 +44,22 @@ class OfflineDecodeFiles
     public string WhisperDecoder { get; set; } = "";
 
     [Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
-     public string WhisperLanguage{ get; set; } = "";
+     public string WhisperLanguage { get; set; } = "";
 
     [Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
-     public string WhisperTask{ get; set; } = "transcribe";
+     public string WhisperTask { get; set; } = "transcribe";
+ 
+     [Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")]
+     public string MoonshinePreprocessor { get; set; } = "";
+ 
+     [Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")]
+     public string MoonshineEncoder { get; set; } = "";
+ 
+     [Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")]
+     public string MoonshineUncachedDecoder { get; set; } = "";
+ 
+     [Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")]
+     public string MoonshineCachedDecoder { get; set; } = "";
 
     [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
     public string TdnnModel { get; set; } = "";
@@ -90,7 +102,7 @@ It specifies number of active paths to keep during the search")]
     public float HotwordsScore { get; set; } = 1.5F;
 
     [Option("files", Required = true, HelpText = "Audio files for decoding")]
-     public IEnumerable<string> Files { get; set; } = new string[] {};
+     public IEnumerable<string> Files { get; set; } = new string[] { };
   }
 
   static void Main(string[] args)
@@ -236,6 +248,13 @@ to download pre-trained Tdnn models.
       config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
       config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
     }
+     else if (!String.IsNullOrEmpty(options.MoonshinePreprocessor))
+     {
+       config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor;
+       config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder;
+       config.ModelConfig.Moonshine.UncachedDecoder = options.MoonshineUncachedDecoder;
+       config.ModelConfig.Moonshine.CachedDecoder = options.MoonshineCachedDecoder;
+     }
     else
     {
       Console.WriteLine("Please provide a model");
@@ -273,10 +292,21 @@ to download pre-trained Tdnn models.
     // display results
     for (int i = 0; i != files.Length; ++i)
     {
-       var text = streams[i].Result.Text;
+       var r = streams[i].Result;
       Console.WriteLine("--------------------");
       Console.WriteLine(files[i]);
-       Console.WriteLine(text);
+       Console.WriteLine("Text: {0}", r.Text);
+       Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
+       if (r.Timestamps != null && r.Timestamps.Length > 0) {
+         Console.Write("Timestamps: [");
+         var sep = "";
+         for (int k = 0; k != r.Timestamps.Length; ++k)
+         {
+           Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));
+           sep = ", ";
+         }
+         Console.WriteLine("]");
+       }
     }
     Console.WriteLine("--------------------");
   }
--- a/dotnet-examples/offline-decode-files/run-moonshine.sh 0 → 100755
查看文件 @3622104
+++ b/dotnet-examples/offline-decode-files/run-moonshine.sh 0 → 100755
查看文件 @3622104
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+   tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+   rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+ fi
+ 
+ dotnet run \
+   --num-threads=2 \
+   --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+   --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+   --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+   --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+   --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+   --files ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
--- a/scripts/dotnet/OfflineModelConfig.cs
查看文件 @3622104
+++ b/scripts/dotnet/OfflineModelConfig.cs
查看文件 @3622104
@@ -24,6 +24,7 @@ namespace SherpaOnnx
             BpeVocab = "";
             TeleSpeechCtc = "";
             SenseVoice = new OfflineSenseVoiceModelConfig();
+             Moonshine = new OfflineMoonshineModelConfig();
         }
         public OfflineTransducerModelConfig Transducer;
         public OfflineParaformerModelConfig Paraformer;
@@ -54,5 +55,6 @@ namespace SherpaOnnx
         public string TeleSpeechCtc;
 
         public OfflineSenseVoiceModelConfig SenseVoice;
+         public OfflineMoonshineModelConfig Moonshine;
     }
 }
--- a/scripts/dotnet/OfflineMoonshineModelConfig.cs 0 → 100644
查看文件 @3622104
+++ b/scripts/dotnet/OfflineMoonshineModelConfig.cs 0 → 100644
查看文件 @3622104
+ /// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)
+ 
+ using System.Runtime.InteropServices;
+ 
+ namespace SherpaOnnx
+ {
+     [StructLayout(LayoutKind.Sequential)]
+     public struct OfflineMoonshineModelConfig
+     {
+         public OfflineMoonshineModelConfig()
+         {
+             Preprocessor = "";
+             Encoder = "";
+             UncachedDecoder = "";
+             CachedDecoder = "";
+         }
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string Preprocessor;
+ 
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string Encoder;
+ 
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string UncachedDecoder;
+ 
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string CachedDecoder;
+     }
+ }
--- a/scripts/dotnet/OfflineRecognizerResult.cs
查看文件 @3622104
+++ b/scripts/dotnet/OfflineRecognizerResult.cs
查看文件 @3622104
@@ -31,17 +31,70 @@ namespace SherpaOnnx
             byte[] stringBuffer = new byte[length];
             Marshal.Copy(impl.Text, stringBuffer, 0, length);
             _text = Encoding.UTF8.GetString(stringBuffer);
+ 
+             _tokens = new String[impl.Count];
+ 
+             unsafe
+             {
+                 byte* buf = (byte*)impl.Tokens;
+                 for (int i = 0; i < impl.Count; i++)
+                 {
+                     length = 0;
+                     byte* start = buf;
+                     while (*buf != 0)
+                     {
+                         ++buf;
+                         length += 1;
+                     }
+                     ++buf;
+ 
+                     stringBuffer = new byte[length];
+                     fixed (byte* pTarget = stringBuffer)
+                     {
+                         for (int k = 0; k < length; k++)
+                         {
+                             pTarget[k] = start[k];
+                         }
+                     }
+ 
+                     _tokens[i] = Encoding.UTF8.GetString(stringBuffer);
+                 }
+             }
+ 
+             unsafe
+             {
+               if (impl.Timestamps != IntPtr.Zero)
+               {
+                 float *t = (float*)impl.Timestamps;
+                 _timestamps = new float[impl.Count];
+                 fixed (float* f = _timestamps)
+                 {
+                   for (int k = 0; k < impl.Count; k++)
+                   {
+                     f[k] = t[k];
+                   }
+                 }
+               }
+             }
+ 
         }
 
         [StructLayout(LayoutKind.Sequential)]
         struct Impl
         {
             public IntPtr Text;
+             public IntPtr Timestamps;
+             public int Count;
+             public IntPtr Tokens;
         }
 
         private String _text;
         public String Text => _text;
-     }
 
+         private String[] _tokens;
+         public String[] Tokens => _tokens;
 
+         private float[] _timestamps;
+         public float[] Timestamps => _timestamps;
+     }
 }