Fangjun Kuang
Committed by GitHub

Fix C# API to support streaming Paraformer (#266)

... ... @@ -63,8 +63,10 @@ jobs:
shell: bash
run: |
cd dotnet-examples/
cd online-decode-files
./run.sh
./run-transducer.sh
./run-paraformer.sh
cd ../offline-decode-files
./run-nemo-ctc.sh
... ...
... ... @@ -55,6 +55,7 @@ sherpa-onnx-zipformer-en-2023-03-30
sherpa-onnx-zipformer-en-2023-04-01
run-offline-decode-files.sh
sherpa-onnx-nemo-ctc-en-citrinet-512
sherpa-onnx-streaming-paraformer-bilingual-zh-en
run-offline-decode-files-nemo-ctc.sh
*.jar
sherpa-onnx-nemo-ctc-*
... ...
... ... @@ -23,15 +23,21 @@ class OnlineDecodeFiles
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
public string Provider { get; set; }
[Option(Required = true, HelpText = "Path to encoder.onnx")]
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
public string Encoder { get; set; }
[Option(Required = true, HelpText = "Path to decoder.onnx")]
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
public string Decoder { get; set; }
[Option(Required = true, HelpText = "Path to joiner.onnx")]
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
public string Joiner { get; set; }
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
public string ParaformerEncoder { get; set; }
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
public string ParaformerDecoder { get; set; }
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
public int NumThreads { get; set; }
... ... @@ -88,6 +94,8 @@ larger than this value. Used only when --enable-endpoint is true.")]
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
{
string usage = @"
(1) Streaming transducer models
dotnet run \
--tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
--encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
... ... @@ -99,8 +107,20 @@ dotnet run \
--files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
(2) Streaming Paraformer models
dotnet run \
--tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
--paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
--paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
--num-threads=2 \
--decoding-method=greedy_search \
--debug=false \
--files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav \
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav
Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
to download pre-trained streaming models.
";
... ... @@ -123,13 +143,17 @@ to download pre-trained streaming models.
// You can change it if your model has a different feature dim.
config.FeatConfig.FeatureDim = 80;
config.TransducerModelConfig.Encoder = options.Encoder;
config.TransducerModelConfig.Decoder = options.Decoder;
config.TransducerModelConfig.Joiner = options.Joiner;
config.TransducerModelConfig.Tokens = options.Tokens;
config.TransducerModelConfig.Provider = options.Provider;
config.TransducerModelConfig.NumThreads = options.NumThreads;
config.TransducerModelConfig.Debug = options.Debug ? 1 : 0;
config.ModelConfig.Transducer.Encoder = options.Encoder;
config.ModelConfig.Transducer.Decoder = options.Decoder;
config.ModelConfig.Transducer.Joiner = options.Joiner;
config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;
config.ModelConfig.Tokens = options.Tokens;
config.ModelConfig.Provider = options.Provider;
config.ModelConfig.NumThreads = options.NumThreads;
config.ModelConfig.Debug = options.Debug ? 1 : 0;
config.DecodingMethod = options.DecodingMethod;
config.MaxActivePaths = options.MaxActivePaths;
... ...
#!/usr/bin/env bash
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
# to download the model files
if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
cd sherpa-onnx-streaming-paraformer-bilingual-zh-en
git lfs pull --include "*.onnx"
cd ..
fi
dotnet run -c Release \
--tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
--paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
--paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
--decoding-method greedy_search \
--files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav \
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav
... ...
... ... @@ -26,15 +26,21 @@ class SpeechRecognitionFromMicrophone
[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
public string Provider { get; set; }
[Option(Required = true, HelpText = "Path to encoder.onnx")]
[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
public string Encoder { get; set; }
[Option(Required = true, HelpText = "Path to decoder.onnx")]
[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
public string Decoder { get; set; }
[Option(Required = true, HelpText = "Path to joiner.onnx")]
[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
public string Joiner { get; set; }
[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
public string ParaformerEncoder { get; set; }
[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
public string ParaformerDecoder { get; set; }
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
public int NumThreads { get; set; }
... ... @@ -87,14 +93,24 @@ larger than this value. Used only when --enable-endpoint is true.")]
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
{
string usage = @"
(1) Streaming transducer models
dotnet run -c Release \
--tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \
--encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
--decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \
--joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx \
--joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx
(2) Streaming Paraformer models
dotnet run \
--tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
--paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
--paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx
Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
to download pre-trained streaming models.
";
... ... @@ -117,13 +133,17 @@ to download pre-trained streaming models.
// You can change it if your model has a different feature dim.
config.FeatConfig.FeatureDim = 80;
config.TransducerModelConfig.Encoder = options.Encoder;
config.TransducerModelConfig.Decoder = options.Decoder;
config.TransducerModelConfig.Joiner = options.Joiner;
config.TransducerModelConfig.Tokens = options.Tokens;
config.TransducerModelConfig.Provider = options.Provider;
config.TransducerModelConfig.NumThreads = options.NumThreads;
config.TransducerModelConfig.Debug = options.Debug ? 1 : 0;
config.ModelConfig.Transducer.Encoder = options.Encoder;
config.ModelConfig.Transducer.Decoder = options.Decoder;
config.ModelConfig.Transducer.Joiner = options.Joiner;
config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;
config.ModelConfig.Tokens = options.Tokens;
config.ModelConfig.Provider = options.Provider;
config.ModelConfig.NumThreads = options.NumThreads;
config.ModelConfig.Debug = options.Debug ? 1 : 0;
config.DecodingMethod = options.DecodingMethod;
config.MaxActivePaths = options.MaxActivePaths;
... ... @@ -135,7 +155,6 @@ to download pre-trained streaming models.
OnlineRecognizer recognizer = new OnlineRecognizer(config);
OnlineStream s = recognizer.CreateStream();
Console.WriteLine(PortAudio.VersionInfo.versionText);
... ... @@ -196,7 +215,6 @@ to download pre-trained streaming models.
stream.Start();
int segment_index = 0;
String lastText = "";
int segmentIndex = 0;
... ...
#!/usr/bin/env bash
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
# to download the model files
if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
cd sherpa-onnx-streaming-paraformer-bilingual-zh-en
git lfs pull --include "*.onnx"
cd ..
fi
dotnet run -c Release \
--tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
--paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
--paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
... ...
... ... @@ -22,12 +22,8 @@ namespace SherpaOnnx
Encoder = "";
Decoder = "";
Joiner = "";
Tokens = "";
NumThreads = 1;
Provider = "cpu";
Debug = 0;
ModelType = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Encoder;
... ... @@ -36,6 +32,40 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string Joiner;
}
[StructLayout(LayoutKind.Sequential)]
public struct OnlineParaformerModelConfig
{
public OnlineParaformerModelConfig()
{
Encoder = "";
Decoder = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Encoder;
[MarshalAs(UnmanagedType.LPStr)]
public string Decoder;
}
[StructLayout(LayoutKind.Sequential)]
public struct OnlineModelConfig
{
public OnlineModelConfig()
{
Transducer = new OnlineTransducerModelConfig();
Paraformer = new OnlineParaformerModelConfig();
Tokens = "";
NumThreads = 1;
Provider = "cpu";
Debug = 0;
ModelType = "";
}
public OnlineTransducerModelConfig Transducer;
public OnlineParaformerModelConfig Paraformer;
[MarshalAs(UnmanagedType.LPStr)]
public string Tokens;
... ... @@ -78,7 +108,7 @@ namespace SherpaOnnx
public OnlineRecognizerConfig()
{
FeatConfig = new FeatureConfig();
TransducerModelConfig = new OnlineTransducerModelConfig();
ModelConfig = new OnlineModelConfig();
DecodingMethod = "greedy_search";
MaxActivePaths = 4;
EnableEndpoint = 0;
... ... @@ -87,7 +117,7 @@ namespace SherpaOnnx
Rule3MinUtteranceLength = 20.0F;
}
public FeatureConfig FeatConfig;
public OnlineTransducerModelConfig TransducerModelConfig;
public OnlineModelConfig ModelConfig;
[MarshalAs(UnmanagedType.LPStr)]
public string DecodingMethod;
... ...