Fangjun Kuang
Committed by GitHub

Fix C# API to support streaming Paraformer (#266)

@@ -63,8 +63,10 @@ jobs: @@ -63,8 +63,10 @@ jobs:
63 shell: bash 63 shell: bash
64 run: | 64 run: |
65 cd dotnet-examples/ 65 cd dotnet-examples/
  66 +
66 cd online-decode-files 67 cd online-decode-files
67 - ./run.sh 68 + ./run-transducer.sh
  69 + ./run-paraformer.sh
68 70
69 cd ../offline-decode-files 71 cd ../offline-decode-files
70 ./run-nemo-ctc.sh 72 ./run-nemo-ctc.sh
@@ -55,6 +55,7 @@ sherpa-onnx-zipformer-en-2023-03-30 @@ -55,6 +55,7 @@ sherpa-onnx-zipformer-en-2023-03-30
55 sherpa-onnx-zipformer-en-2023-04-01 55 sherpa-onnx-zipformer-en-2023-04-01
56 run-offline-decode-files.sh 56 run-offline-decode-files.sh
57 sherpa-onnx-nemo-ctc-en-citrinet-512 57 sherpa-onnx-nemo-ctc-en-citrinet-512
  58 +sherpa-onnx-streaming-paraformer-bilingual-zh-en
58 run-offline-decode-files-nemo-ctc.sh 59 run-offline-decode-files-nemo-ctc.sh
59 *.jar 60 *.jar
60 sherpa-onnx-nemo-ctc-* 61 sherpa-onnx-nemo-ctc-*
@@ -23,15 +23,21 @@ class OnlineDecodeFiles @@ -23,15 +23,21 @@ class OnlineDecodeFiles
23 [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] 23 [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
24 public string Provider { get; set; } 24 public string Provider { get; set; }
25 25
26 - [Option(Required = true, HelpText = "Path to encoder.onnx")] 26 + [Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
27 public string Encoder { get; set; } 27 public string Encoder { get; set; }
28 28
29 - [Option(Required = true, HelpText = "Path to decoder.onnx")] 29 + [Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
30 public string Decoder { get; set; } 30 public string Decoder { get; set; }
31 31
32 - [Option(Required = true, HelpText = "Path to joiner.onnx")] 32 + [Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
33 public string Joiner { get; set; } 33 public string Joiner { get; set; }
34 34
  35 + [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
  36 + public string ParaformerEncoder { get; set; }
  37 +
  38 + [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
  39 + public string ParaformerDecoder { get; set; }
  40 +
35 [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] 41 [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
36 public int NumThreads { get; set; } 42 public int NumThreads { get; set; }
37 43
@@ -88,6 +94,8 @@ larger than this value. Used only when --enable-endpoint is true.")] @@ -88,6 +94,8 @@ larger than this value. Used only when --enable-endpoint is true.")]
88 private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs) 94 private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
89 { 95 {
90 string usage = @" 96 string usage = @"
  97 +(1) Streaming transducer models
  98 +
91 dotnet run \ 99 dotnet run \
92 --tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ 100 --tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
93 --encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \ 101 --encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
@@ -99,8 +107,20 @@ dotnet run \ @@ -99,8 +107,20 @@ dotnet run \
99 --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \ 107 --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
100 ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav 108 ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
101 109
  110 +(2) Streaming Paraformer models
  111 +dotnet run \
  112 + --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  113 + --paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
  114 + --paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
  115 + --num-threads=2 \
  116 + --decoding-method=greedy_search \
  117 + --debug=false \
  118 + --files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav \
  119 + ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav
  120 +
102 Please refer to 121 Please refer to
103 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html 122 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
  123 +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
104 to download pre-trained streaming models. 124 to download pre-trained streaming models.
105 "; 125 ";
106 126
@@ -123,13 +143,17 @@ to download pre-trained streaming models. @@ -123,13 +143,17 @@ to download pre-trained streaming models.
123 // You can change it if your model has a different feature dim. 143 // You can change it if your model has a different feature dim.
124 config.FeatConfig.FeatureDim = 80; 144 config.FeatConfig.FeatureDim = 80;
125 145
126 - config.TransducerModelConfig.Encoder = options.Encoder;  
127 - config.TransducerModelConfig.Decoder = options.Decoder;  
128 - config.TransducerModelConfig.Joiner = options.Joiner;  
129 - config.TransducerModelConfig.Tokens = options.Tokens;  
130 - config.TransducerModelConfig.Provider = options.Provider;  
131 - config.TransducerModelConfig.NumThreads = options.NumThreads;  
132 - config.TransducerModelConfig.Debug = options.Debug ? 1 : 0; 146 + config.ModelConfig.Transducer.Encoder = options.Encoder;
  147 + config.ModelConfig.Transducer.Decoder = options.Decoder;
  148 + config.ModelConfig.Transducer.Joiner = options.Joiner;
  149 +
  150 + config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
  151 + config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;
  152 +
  153 + config.ModelConfig.Tokens = options.Tokens;
  154 + config.ModelConfig.Provider = options.Provider;
  155 + config.ModelConfig.NumThreads = options.NumThreads;
  156 + config.ModelConfig.Debug = options.Debug ? 1 : 0;
133 157
134 config.DecodingMethod = options.DecodingMethod; 158 config.DecodingMethod = options.DecodingMethod;
135 config.MaxActivePaths = options.MaxActivePaths; 159 config.MaxActivePaths = options.MaxActivePaths;
  1 +#!/usr/bin/env bash
  2 +
  3 +# Please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
  5 +# to download the model files
  6 +
  7 +if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
  8 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
  9 + cd sherpa-onnx-streaming-paraformer-bilingual-zh-en
  10 + git lfs pull --include "*.onnx"
  11 + cd ..
  12 +fi
  13 +
  14 +dotnet run -c Release \
  15 + --tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  16 + --paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
  17 + --paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
  18 + --decoding-method greedy_search \
  19 + --files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav \
  20 + ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav
@@ -26,15 +26,21 @@ class SpeechRecognitionFromMicrophone @@ -26,15 +26,21 @@ class SpeechRecognitionFromMicrophone
26 [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] 26 [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
27 public string Provider { get; set; } 27 public string Provider { get; set; }
28 28
29 - [Option(Required = true, HelpText = "Path to encoder.onnx")] 29 + [Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
30 public string Encoder { get; set; } 30 public string Encoder { get; set; }
31 31
32 - [Option(Required = true, HelpText = "Path to decoder.onnx")] 32 + [Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
33 public string Decoder { get; set; } 33 public string Decoder { get; set; }
34 34
35 - [Option(Required = true, HelpText = "Path to joiner.onnx")] 35 + [Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
36 public string Joiner { get; set; } 36 public string Joiner { get; set; }
37 37
  38 + [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
  39 + public string ParaformerEncoder { get; set; }
  40 +
  41 + [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
  42 + public string ParaformerDecoder { get; set; }
  43 +
38 [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] 44 [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
39 public int NumThreads { get; set; } 45 public int NumThreads { get; set; }
40 46
@@ -87,14 +93,24 @@ larger than this value. Used only when --enable-endpoint is true.")] @@ -87,14 +93,24 @@ larger than this value. Used only when --enable-endpoint is true.")]
87 private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs) 93 private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
88 { 94 {
89 string usage = @" 95 string usage = @"
  96 +(1) Streaming transducer models
  97 +
90 dotnet run -c Release \ 98 dotnet run -c Release \
91 --tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \ 99 --tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \
92 --encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \ 100 --encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
93 --decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \ 101 --decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \
94 - --joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx \ 102 + --joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx
  103 +
  104 +(2) Streaming Paraformer models
  105 +
  106 +dotnet run \
  107 + --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  108 + --paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
  109 + --paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx
95 110
96 Please refer to 111 Please refer to
97 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html 112 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
  113 +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
98 to download pre-trained streaming models. 114 to download pre-trained streaming models.
99 "; 115 ";
100 116
@@ -117,13 +133,17 @@ to download pre-trained streaming models. @@ -117,13 +133,17 @@ to download pre-trained streaming models.
117 // You can change it if your model has a different feature dim. 133 // You can change it if your model has a different feature dim.
118 config.FeatConfig.FeatureDim = 80; 134 config.FeatConfig.FeatureDim = 80;
119 135
120 - config.TransducerModelConfig.Encoder = options.Encoder;  
121 - config.TransducerModelConfig.Decoder = options.Decoder;  
122 - config.TransducerModelConfig.Joiner = options.Joiner;  
123 - config.TransducerModelConfig.Tokens = options.Tokens;  
124 - config.TransducerModelConfig.Provider = options.Provider;  
125 - config.TransducerModelConfig.NumThreads = options.NumThreads;  
126 - config.TransducerModelConfig.Debug = options.Debug ? 1 : 0; 136 + config.ModelConfig.Transducer.Encoder = options.Encoder;
  137 + config.ModelConfig.Transducer.Decoder = options.Decoder;
  138 + config.ModelConfig.Transducer.Joiner = options.Joiner;
  139 +
  140 + config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
  141 + config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;
  142 +
  143 + config.ModelConfig.Tokens = options.Tokens;
  144 + config.ModelConfig.Provider = options.Provider;
  145 + config.ModelConfig.NumThreads = options.NumThreads;
  146 + config.ModelConfig.Debug = options.Debug ? 1 : 0;
127 147
128 config.DecodingMethod = options.DecodingMethod; 148 config.DecodingMethod = options.DecodingMethod;
129 config.MaxActivePaths = options.MaxActivePaths; 149 config.MaxActivePaths = options.MaxActivePaths;
@@ -135,7 +155,6 @@ to download pre-trained streaming models. @@ -135,7 +155,6 @@ to download pre-trained streaming models.
135 155
136 OnlineRecognizer recognizer = new OnlineRecognizer(config); 156 OnlineRecognizer recognizer = new OnlineRecognizer(config);
137 157
138 -  
139 OnlineStream s = recognizer.CreateStream(); 158 OnlineStream s = recognizer.CreateStream();
140 159
141 Console.WriteLine(PortAudio.VersionInfo.versionText); 160 Console.WriteLine(PortAudio.VersionInfo.versionText);
@@ -196,7 +215,6 @@ to download pre-trained streaming models. @@ -196,7 +215,6 @@ to download pre-trained streaming models.
196 215
197 stream.Start(); 216 stream.Start();
198 217
199 - int segment_index = 0;  
200 String lastText = ""; 218 String lastText = "";
201 int segmentIndex = 0; 219 int segmentIndex = 0;
202 220
  1 +#!/usr/bin/env bash
  2 +
  3 +# Please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
  5 +# to download the model files
  6 +
  7 +if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
  8 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
  9 + cd sherpa-onnx-streaming-paraformer-bilingual-zh-en
  10 + git lfs pull --include "*.onnx"
  11 + cd ..
  12 +fi
  13 +
  14 +dotnet run -c Release \
  15 + --tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  16 + --paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
  17 + --paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
@@ -22,12 +22,8 @@ namespace SherpaOnnx @@ -22,12 +22,8 @@ namespace SherpaOnnx
22 Encoder = ""; 22 Encoder = "";
23 Decoder = ""; 23 Decoder = "";
24 Joiner = ""; 24 Joiner = "";
25 - Tokens = "";  
26 - NumThreads = 1;  
27 - Provider = "cpu";  
28 - Debug = 0;  
29 - ModelType = "";  
30 } 25 }
  26 +
31 [MarshalAs(UnmanagedType.LPStr)] 27 [MarshalAs(UnmanagedType.LPStr)]
32 public string Encoder; 28 public string Encoder;
33 29
@@ -36,6 +32,40 @@ namespace SherpaOnnx @@ -36,6 +32,40 @@ namespace SherpaOnnx
36 32
37 [MarshalAs(UnmanagedType.LPStr)] 33 [MarshalAs(UnmanagedType.LPStr)]
38 public string Joiner; 34 public string Joiner;
  35 + }
  36 +
  37 + [StructLayout(LayoutKind.Sequential)]
  38 + public struct OnlineParaformerModelConfig
  39 + {
  40 + public OnlineParaformerModelConfig()
  41 + {
  42 + Encoder = "";
  43 + Decoder = "";
  44 + }
  45 +
  46 + [MarshalAs(UnmanagedType.LPStr)]
  47 + public string Encoder;
  48 +
  49 + [MarshalAs(UnmanagedType.LPStr)]
  50 + public string Decoder;
  51 + }
  52 +
  53 + [StructLayout(LayoutKind.Sequential)]
  54 + public struct OnlineModelConfig
  55 + {
  56 + public OnlineModelConfig()
  57 + {
  58 + Transducer = new OnlineTransducerModelConfig();
  59 + Paraformer = new OnlineParaformerModelConfig();
  60 + Tokens = "";
  61 + NumThreads = 1;
  62 + Provider = "cpu";
  63 + Debug = 0;
  64 + ModelType = "";
  65 + }
  66 +
  67 + public OnlineTransducerModelConfig Transducer;
  68 + public OnlineParaformerModelConfig Paraformer;
39 69
40 [MarshalAs(UnmanagedType.LPStr)] 70 [MarshalAs(UnmanagedType.LPStr)]
41 public string Tokens; 71 public string Tokens;
@@ -78,7 +108,7 @@ namespace SherpaOnnx @@ -78,7 +108,7 @@ namespace SherpaOnnx
78 public OnlineRecognizerConfig() 108 public OnlineRecognizerConfig()
79 { 109 {
80 FeatConfig = new FeatureConfig(); 110 FeatConfig = new FeatureConfig();
81 - TransducerModelConfig = new OnlineTransducerModelConfig(); 111 + ModelConfig = new OnlineModelConfig();
82 DecodingMethod = "greedy_search"; 112 DecodingMethod = "greedy_search";
83 MaxActivePaths = 4; 113 MaxActivePaths = 4;
84 EnableEndpoint = 0; 114 EnableEndpoint = 0;
@@ -87,7 +117,7 @@ namespace SherpaOnnx @@ -87,7 +117,7 @@ namespace SherpaOnnx
87 Rule3MinUtteranceLength = 20.0F; 117 Rule3MinUtteranceLength = 20.0F;
88 } 118 }
89 public FeatureConfig FeatConfig; 119 public FeatureConfig FeatConfig;
90 - public OnlineTransducerModelConfig TransducerModelConfig; 120 + public OnlineModelConfig ModelConfig;
91 121
92 [MarshalAs(UnmanagedType.LPStr)] 122 [MarshalAs(UnmanagedType.LPStr)]
93 public string DecodingMethod; 123 public string DecodingMethod;