Fangjun Kuang
Committed by GitHub

Support paraformer on iOS (#265)

* Fix C API to support streaming paraformer

* Fix Swift API

* Support paraformer in iOS
@@ -113,13 +113,13 @@ int32_t main(int32_t argc, char *argv[]) { @@ -113,13 +113,13 @@ int32_t main(int32_t argc, char *argv[]) {
113 config.model_config.tokens = value; 113 config.model_config.tokens = value;
114 break; 114 break;
115 case 'e': 115 case 'e':
116 - config.model_config.encoder = value; 116 + config.model_config.transducer.encoder = value;
117 break; 117 break;
118 case 'd': 118 case 'd':
119 - config.model_config.decoder = value; 119 + config.model_config.transducer.decoder = value;
120 break; 120 break;
121 case 'j': 121 case 'j':
122 - config.model_config.joiner = value; 122 + config.model_config.transducer.joiner = value;
123 break; 123 break;
124 case 'n': 124 case 'n':
125 config.model_config.num_threads = atoi(value); 125 config.model_config.num_threads = atoi(value);
@@ -7,6 +7,8 @@ @@ -7,6 +7,8 @@
7 objects = { 7 objects = {
8 8
9 /* Begin PBXBuildFile section */ 9 /* Begin PBXBuildFile section */
  10 + C93989AE2A89FE13009AB859 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */; };
  11 + C93989B02A89FE33009AB859 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */; };
10 C984A7E829A9EEB700D74C52 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E729A9EEB700D74C52 /* AppDelegate.swift */; }; 12 C984A7E829A9EEB700D74C52 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E729A9EEB700D74C52 /* AppDelegate.swift */; };
11 C984A7EA29A9EEB700D74C52 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */; }; 13 C984A7EA29A9EEB700D74C52 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */; };
12 C984A7F129A9EEB900D74C52 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C984A7F029A9EEB900D74C52 /* Assets.xcassets */; }; 14 C984A7F129A9EEB900D74C52 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C984A7F029A9EEB900D74C52 /* Assets.xcassets */; };
@@ -18,8 +20,6 @@ @@ -18,8 +20,6 @@
18 C984A82829AA196100D74C52 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = C984A82629AA196100D74C52 /* Main.storyboard */; }; 20 C984A82829AA196100D74C52 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = C984A82629AA196100D74C52 /* Main.storyboard */; };
19 C984A82A29AA19AC00D74C52 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A82929AA19AC00D74C52 /* Model.swift */; }; 21 C984A82A29AA19AC00D74C52 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A82929AA19AC00D74C52 /* Model.swift */; };
20 C984A83C29AA430B00D74C52 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A83B29AA430B00D74C52 /* ViewController.swift */; }; 22 C984A83C29AA430B00D74C52 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A83B29AA430B00D74C52 /* ViewController.swift */; };
21 - C984A83D29AA43D900D74C52 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */; };  
22 - C984A83F29AA43EE00D74C52 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A83E29AA43EE00D74C52 /* onnxruntime.xcframework */; };  
23 /* End PBXBuildFile section */ 23 /* End PBXBuildFile section */
24 24
25 /* Begin PBXContainerItemProxy section */ 25 /* Begin PBXContainerItemProxy section */
@@ -40,6 +40,10 @@ @@ -40,6 +40,10 @@
40 /* End PBXContainerItemProxy section */ 40 /* End PBXContainerItemProxy section */
41 41
42 /* Begin PBXFileReference section */ 42 /* Begin PBXFileReference section */
  43 + C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.15.1/onnxruntime.xcframework"; sourceTree = "<group>"; };
  44 + C93989B12A89FF78009AB859 /* decoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; name = decoder.int8.onnx; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; sourceTree = "<group>"; };
  45 + C93989B22A89FF78009AB859 /* encoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; name = encoder.int8.onnx; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; sourceTree = "<group>"; };
  46 + C93989B32A89FF78009AB859 /* tokens.txt */ = {isa = PBXFileReference; lastKnownFileType = text; name = tokens.txt; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; sourceTree = "<group>"; };
43 C984A7E429A9EEB700D74C52 /* SherpaOnnx.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnx.app; sourceTree = BUILT_PRODUCTS_DIR; }; 47 C984A7E429A9EEB700D74C52 /* SherpaOnnx.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnx.app; sourceTree = BUILT_PRODUCTS_DIR; };
44 C984A7E729A9EEB700D74C52 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; }; 48 C984A7E729A9EEB700D74C52 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
45 C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; }; 49 C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
@@ -66,8 +70,8 @@ @@ -66,8 +70,8 @@
66 isa = PBXFrameworksBuildPhase; 70 isa = PBXFrameworksBuildPhase;
67 buildActionMask = 2147483647; 71 buildActionMask = 2147483647;
68 files = ( 72 files = (
69 - C984A83F29AA43EE00D74C52 /* onnxruntime.xcframework in Frameworks */,  
70 - C984A83D29AA43D900D74C52 /* sherpa-onnx.xcframework in Frameworks */, 73 + C93989B02A89FE33009AB859 /* onnxruntime.xcframework in Frameworks */,
  74 + C93989AE2A89FE13009AB859 /* sherpa-onnx.xcframework in Frameworks */,
71 ); 75 );
72 runOnlyForDeploymentPostprocessing = 0; 76 runOnlyForDeploymentPostprocessing = 0;
73 }; 77 };
@@ -146,8 +150,12 @@ @@ -146,8 +150,12 @@
146 C984A81A29AA11C500D74C52 /* Frameworks */ = { 150 C984A81A29AA11C500D74C52 /* Frameworks */ = {
147 isa = PBXGroup; 151 isa = PBXGroup;
148 children = ( 152 children = (
  153 + C93989B12A89FF78009AB859 /* decoder.int8.onnx */,
  154 + C93989B22A89FF78009AB859 /* encoder.int8.onnx */,
  155 + C93989B32A89FF78009AB859 /* tokens.txt */,
149 C984A82029AA139600D74C52 /* onnxruntime.xcframework */, 156 C984A82029AA139600D74C52 /* onnxruntime.xcframework */,
150 C984A83E29AA43EE00D74C52 /* onnxruntime.xcframework */, 157 C984A83E29AA43EE00D74C52 /* onnxruntime.xcframework */,
  158 + C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */,
151 C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */, 159 C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */,
152 ); 160 );
153 name = Frameworks; 161 name = Frameworks;
@@ -15,70 +15,91 @@ func getResource(_ forResource: String, _ ofType: String) -> String { @@ -15,70 +15,91 @@ func getResource(_ forResource: String, _ ofType: String) -> String {
15 15
16 /// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English) 16 /// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
17 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html 17 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
18 -func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig { 18 +func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
19 let encoder = getResource("encoder-epoch-99-avg-1", "onnx") 19 let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
20 let decoder = getResource("decoder-epoch-99-avg-1", "onnx") 20 let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
21 let joiner = getResource("joiner-epoch-99-avg-1", "onnx") 21 let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
22 let tokens = getResource("tokens", "txt") 22 let tokens = getResource("tokens", "txt")
23 23
24 - return sherpaOnnxOnlineTransducerModelConfig( 24 + return sherpaOnnxOnlineModelConfig(
  25 + tokens: tokens,
  26 + transducer: sherpaOnnxOnlineTransducerModelConfig(
25 encoder: encoder, 27 encoder: encoder,
26 decoder: decoder, 28 decoder: decoder,
27 - joiner: joiner,  
28 - tokens: tokens,  
29 - numThreads: 2, 29 + joiner: joiner
  30 + ),
  31 + numThreads: 1,
30 modelType: "zipformer" 32 modelType: "zipformer"
31 ) 33 )
32 } 34 }
33 35
34 -func getZhZipformer20230615() -> SherpaOnnxOnlineTransducerModelConfig { 36 +func getZhZipformer20230615() -> SherpaOnnxOnlineModelConfig {
35 let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128", "onnx") 37 let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
36 let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx") 38 let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
37 let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx") 39 let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx")
38 let tokens = getResource("tokens", "txt") 40 let tokens = getResource("tokens", "txt")
39 41
40 - return sherpaOnnxOnlineTransducerModelConfig( 42 + return sherpaOnnxOnlineModelConfig(
  43 + tokens: tokens,
  44 + transducer: sherpaOnnxOnlineTransducerModelConfig(
41 encoder: encoder, 45 encoder: encoder,
42 decoder: decoder, 46 decoder: decoder,
43 - joiner: joiner,  
44 - tokens: tokens,  
45 - numThreads: 2, 47 + joiner: joiner
  48 + ),
  49 + numThreads: 1,
46 modelType: "zipformer2" 50 modelType: "zipformer2"
47 ) 51 )
48 } 52 }
49 53
50 -func getZhZipformer20230615Int8() -> SherpaOnnxOnlineTransducerModelConfig { 54 +func getZhZipformer20230615Int8() -> SherpaOnnxOnlineModelConfig {
51 let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128.int8", "onnx") 55 let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128.int8", "onnx")
52 let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx") 56 let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
53 let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx") 57 let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx")
54 let tokens = getResource("tokens", "txt") 58 let tokens = getResource("tokens", "txt")
55 59
56 - return sherpaOnnxOnlineTransducerModelConfig( 60 + return sherpaOnnxOnlineModelConfig(
  61 + tokens: tokens,
  62 + transducer: sherpaOnnxOnlineTransducerModelConfig(
57 encoder: encoder, 63 encoder: encoder,
58 decoder: decoder, 64 decoder: decoder,
59 - joiner: joiner,  
60 - tokens: tokens,  
61 - numThreads: 2, 65 + joiner: joiner),
  66 + numThreads: 1,
62 modelType: "zipformer2" 67 modelType: "zipformer2"
63 ) 68 )
64 } 69 }
65 70
66 -func getEnZipformer20230626() -> SherpaOnnxOnlineTransducerModelConfig { 71 +func getEnZipformer20230626() -> SherpaOnnxOnlineModelConfig {
67 let encoder = getResource("encoder-epoch-99-avg-1-chunk-16-left-128", "onnx") 72 let encoder = getResource("encoder-epoch-99-avg-1-chunk-16-left-128", "onnx")
68 let decoder = getResource("decoder-epoch-99-avg-1-chunk-16-left-128", "onnx") 73 let decoder = getResource("decoder-epoch-99-avg-1-chunk-16-left-128", "onnx")
69 let joiner = getResource("joiner-epoch-99-avg-1-chunk-16-left-128", "onnx") 74 let joiner = getResource("joiner-epoch-99-avg-1-chunk-16-left-128", "onnx")
70 let tokens = getResource("tokens", "txt") 75 let tokens = getResource("tokens", "txt")
71 76
72 - return sherpaOnnxOnlineTransducerModelConfig( 77 + return sherpaOnnxOnlineModelConfig(
  78 + tokens: tokens,
  79 + transducer: sherpaOnnxOnlineTransducerModelConfig(
73 encoder: encoder, 80 encoder: encoder,
74 decoder: decoder, 81 decoder: decoder,
75 - joiner: joiner,  
76 - tokens: tokens,  
77 - numThreads: 2, 82 + joiner: joiner),
  83 + numThreads: 1,
78 modelType: "zipformer2" 84 modelType: "zipformer2"
79 ) 85 )
80 } 86 }
81 87
  88 +func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig {
  89 + let encoder = getResource("encoder.int8", "onnx")
  90 + let decoder = getResource("decoder.int8", "onnx")
  91 + let tokens = getResource("tokens", "txt")
  92 +
  93 + return sherpaOnnxOnlineModelConfig(
  94 + tokens: tokens,
  95 + paraformer: sherpaOnnxOnlineParaformerModelConfig(
  96 + encoder: encoder,
  97 + decoder: decoder),
  98 + numThreads: 1,
  99 + modelType: "paraformer"
  100 + )
  101 +}
  102 +
82 /// Please refer to 103 /// Please refer to
83 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html 104 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
84 /// to add more models if you need 105 /// to add more models if you need
@@ -87,7 +87,8 @@ class ViewController: UIViewController { @@ -87,7 +87,8 @@ class ViewController: UIViewController {
87 87
88 // let modelConfig = getBilingualStreamZhEnZipformer20230220() 88 // let modelConfig = getBilingualStreamZhEnZipformer20230220()
89 // let modelConfig = getZhZipformer20230615() 89 // let modelConfig = getZhZipformer20230615()
90 - let modelConfig = getEnZipformer20230626() 90 + // let modelConfig = getEnZipformer20230626()
  91 + let modelConfig = getBilingualStreamingZhEnParaformer()
91 92
92 let featConfig = sherpaOnnxFeatureConfig( 93 let featConfig = sherpaOnnxFeatureConfig(
93 sampleRate: 16000, 94 sampleRate: 16000,
@@ -15,22 +15,39 @@ func getResource(_ forResource: String, _ ofType: String) -> String { @@ -15,22 +15,39 @@ func getResource(_ forResource: String, _ ofType: String) -> String {
15 15
16 /// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English) 16 /// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
17 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html 17 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
18 -func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig { 18 +func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
19 let encoder = getResource("encoder-epoch-99-avg-1", "onnx") 19 let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
20 let decoder = getResource("decoder-epoch-99-avg-1", "onnx") 20 let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
21 let joiner = getResource("joiner-epoch-99-avg-1", "onnx") 21 let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
22 let tokens = getResource("tokens", "txt") 22 let tokens = getResource("tokens", "txt")
23 23
24 - return sherpaOnnxOnlineTransducerModelConfig( 24 + return sherpaOnnxOnlineModelConfig(
  25 + tokens: tokens,
  26 + transducer: sherpaOnnxOnlineTransducerModelConfig(
25 encoder: encoder, 27 encoder: encoder,
26 decoder: decoder, 28 decoder: decoder,
27 - joiner: joiner,  
28 - tokens: tokens, 29 + joiner: joiner),
29 numThreads: 2, 30 numThreads: 2,
30 modelType: "zipformer" 31 modelType: "zipformer"
31 ) 32 )
32 } 33 }
33 34
  35 +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
  36 +func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig {
  37 + let encoder = getResource("encoder.int8", "onnx")
  38 + let decoder = getResource("decoder.int8", "onnx")
  39 + let tokens = getResource("tokens", "txt")
  40 +
  41 + return sherpaOnnxOnlineModelConfig(
  42 + tokens: tokens,
  43 + paraformer: sherpaOnnxOnlineParaformerModelConfig(
  44 + encoder: encoder,
  45 + decoder: decoder),
  46 + numThreads: 1,
  47 + modelType: "paraformer"
  48 + )
  49 +}
  50 +
34 /// Please refer to 51 /// Please refer to
35 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html 52 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
36 /// to add more models if you need 53 /// to add more models if you need
@@ -59,7 +59,8 @@ class SherpaOnnxViewModel: ObservableObject { @@ -59,7 +59,8 @@ class SherpaOnnxViewModel: ObservableObject {
59 // 59 //
60 // You can also modify Model.swift to add new pre-trained models from 60 // You can also modify Model.swift to add new pre-trained models from
61 // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html 61 // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
62 - let modelConfig = getBilingualStreamZhEnZipformer20230220() 62 + // let modelConfig = getBilingualStreamZhEnZipformer20230220()
  63 + let modelConfig = getBilingualStreamingZhEnParaformer()
63 64
64 let featConfig = sherpaOnnxFeatureConfig( 65 let featConfig = sherpaOnnxFeatureConfig(
65 sampleRate: 16000, 66 sampleRate: 16000,
@@ -39,11 +39,17 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( @@ -39,11 +39,17 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
39 SHERPA_ONNX_OR(config->feat_config.feature_dim, 80); 39 SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
40 40
41 recognizer_config.model_config.transducer.encoder = 41 recognizer_config.model_config.transducer.encoder =
42 - SHERPA_ONNX_OR(config->model_config.encoder, ""); 42 + SHERPA_ONNX_OR(config->model_config.transducer.encoder, "");
43 recognizer_config.model_config.transducer.decoder = 43 recognizer_config.model_config.transducer.decoder =
44 - SHERPA_ONNX_OR(config->model_config.decoder, ""); 44 + SHERPA_ONNX_OR(config->model_config.transducer.decoder, "");
45 recognizer_config.model_config.transducer.joiner = 45 recognizer_config.model_config.transducer.joiner =
46 - SHERPA_ONNX_OR(config->model_config.joiner, ""); 46 + SHERPA_ONNX_OR(config->model_config.transducer.joiner, "");
  47 +
  48 + recognizer_config.model_config.paraformer.encoder =
  49 + SHERPA_ONNX_OR(config->model_config.paraformer.encoder, "");
  50 + recognizer_config.model_config.paraformer.decoder =
  51 + SHERPA_ONNX_OR(config->model_config.paraformer.decoder, "");
  52 +
47 recognizer_config.model_config.tokens = 53 recognizer_config.model_config.tokens =
48 SHERPA_ONNX_OR(config->model_config.tokens, ""); 54 SHERPA_ONNX_OR(config->model_config.tokens, "");
49 recognizer_config.model_config.num_threads = 55 recognizer_config.model_config.num_threads =
@@ -128,6 +134,8 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( @@ -128,6 +134,8 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
128 const auto &text = result.text; 134 const auto &text = result.text;
129 135
130 auto r = new SherpaOnnxOnlineRecognizerResult; 136 auto r = new SherpaOnnxOnlineRecognizerResult;
  137 + memset(r, 0, sizeof(SherpaOnnxOnlineRecognizerResult));
  138 +
131 // copy text 139 // copy text
132 r->text = new char[text.size() + 1]; 140 r->text = new char[text.size() + 1];
133 std::copy(text.begin(), text.end(), const_cast<char *>(r->text)); 141 std::copy(text.begin(), text.end(), const_cast<char *>(r->text));
@@ -153,7 +161,6 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( @@ -153,7 +161,6 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
153 r->tokens = new char[total_length]; 161 r->tokens = new char[total_length];
154 memset(reinterpret_cast<void *>(const_cast<char *>(r->tokens)), 0, 162 memset(reinterpret_cast<void *>(const_cast<char *>(r->tokens)), 0,
155 total_length); 163 total_length);
156 - r->timestamps = new float[r->count];  
157 char **tokens_temp = new char *[r->count]; 164 char **tokens_temp = new char *[r->count];
158 int32_t pos = 0; 165 int32_t pos = 0;
159 for (int32_t i = 0; i < r->count; ++i) { 166 for (int32_t i = 0; i < r->count; ++i) {
@@ -162,10 +169,17 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( @@ -162,10 +169,17 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
162 result.tokens[i].c_str(), result.tokens[i].size()); 169 result.tokens[i].c_str(), result.tokens[i].size());
163 // +1 to move past the null character 170 // +1 to move past the null character
164 pos += result.tokens[i].size() + 1; 171 pos += result.tokens[i].size() + 1;
165 - r->timestamps[i] = result.timestamps[i];  
166 } 172 }
167 -  
168 r->tokens_arr = tokens_temp; 173 r->tokens_arr = tokens_temp;
  174 +
  175 + if (!result.timestamps.empty()) {
  176 + r->timestamps = new float[r->count];
  177 + std::copy(result.timestamps.begin(), result.timestamps.end(),
  178 + r->timestamps);
  179 + } else {
  180 + r->timestamps = nullptr;
  181 + }
  182 +
169 } else { 183 } else {
170 r->count = 0; 184 r->count = 0;
171 r->timestamps = nullptr; 185 r->timestamps = nullptr;
@@ -50,12 +50,25 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig { @@ -50,12 +50,25 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig {
50 const char *encoder; 50 const char *encoder;
51 const char *decoder; 51 const char *decoder;
52 const char *joiner; 52 const char *joiner;
  53 +} SherpaOnnxOnlineTransducerModelConfig;
  54 +
  55 +// please visit
  56 +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
  57 +// to download pre-trained streaming paraformer models
  58 +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineParaformerModelConfig {
  59 + const char *encoder;
  60 + const char *decoder;
  61 +} SherpaOnnxOnlineParaformerModelConfig;
  62 +
  63 +SHERPA_ONNX_API typedef struct SherpaOnnxModelConfig {
  64 + SherpaOnnxOnlineTransducerModelConfig transducer;
  65 + SherpaOnnxOnlineParaformerModelConfig paraformer;
53 const char *tokens; 66 const char *tokens;
54 int32_t num_threads; 67 int32_t num_threads;
55 const char *provider; 68 const char *provider;
56 int32_t debug; // true to print debug information of the model 69 int32_t debug; // true to print debug information of the model
57 const char *model_type; 70 const char *model_type;
58 -} SherpaOnnxOnlineTransducerModelConfig; 71 +} SherpaOnnxOnlineModelConfig;
59 72
60 /// It expects 16 kHz 16-bit single channel wave format. 73 /// It expects 16 kHz 16-bit single channel wave format.
61 SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig { 74 SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
@@ -71,7 +84,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig { @@ -71,7 +84,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
71 84
72 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { 85 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
73 SherpaOnnxFeatureConfig feat_config; 86 SherpaOnnxFeatureConfig feat_config;
74 - SherpaOnnxOnlineTransducerModelConfig model_config; 87 + SherpaOnnxOnlineModelConfig model_config;
75 88
76 /// Possible values are: greedy_search, modified_beam_search 89 /// Possible values are: greedy_search, modified_beam_search
77 const char *decoding_method; 90 const char *decoding_method;
@@ -18,31 +18,71 @@ func toCPointer(_ s: String) -> UnsafePointer<Int8>! { @@ -18,31 +18,71 @@ func toCPointer(_ s: String) -> UnsafePointer<Int8>! {
18 /// Return an instance of SherpaOnnxOnlineTransducerModelConfig. 18 /// Return an instance of SherpaOnnxOnlineTransducerModelConfig.
19 /// 19 ///
20 /// Please refer to 20 /// Please refer to
21 -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html 21 +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
22 /// to download the required `.onnx` files. 22 /// to download the required `.onnx` files.
23 /// 23 ///
24 /// - Parameters: 24 /// - Parameters:
25 /// - encoder: Path to encoder.onnx 25 /// - encoder: Path to encoder.onnx
26 /// - decoder: Path to decoder.onnx 26 /// - decoder: Path to decoder.onnx
27 /// - joiner: Path to joiner.onnx 27 /// - joiner: Path to joiner.onnx
  28 +///
  29 +/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
  30 +func sherpaOnnxOnlineTransducerModelConfig(
  31 + encoder: String = "",
  32 + decoder: String = "",
  33 + joiner: String = ""
  34 +) -> SherpaOnnxOnlineTransducerModelConfig {
  35 + return SherpaOnnxOnlineTransducerModelConfig(
  36 + encoder: toCPointer(encoder),
  37 + decoder: toCPointer(decoder),
  38 + joiner: toCPointer(joiner)
  39 + )
  40 +}
  41 +
  42 +/// Return an instance of SherpaOnnxOnlineParaformerModelConfig.
  43 +///
  44 +/// Please refer to
  45 +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
  46 +/// to download the required `.onnx` files.
  47 +///
  48 +/// - Parameters:
  49 +/// - encoder: Path to encoder.onnx
  50 +/// - decoder: Path to decoder.onnx
  51 +///
  52 +/// - Returns: Return an instance of SherpaOnnxOnlineParaformerModelConfig
  53 +func sherpaOnnxOnlineParaformerModelConfig(
  54 + encoder: String = "",
  55 + decoder: String = ""
  56 +) -> SherpaOnnxOnlineParaformerModelConfig {
  57 + return SherpaOnnxOnlineParaformerModelConfig(
  58 + encoder: toCPointer(encoder),
  59 + decoder: toCPointer(decoder)
  60 + )
  61 +}
  62 +
  63 +/// Return an instance of SherpaOnnxOnlineModelConfig.
  64 +///
  65 +/// Please refer to
  66 +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  67 +/// to download the required `.onnx` files.
  68 +///
  69 +/// - Parameters:
28 /// - tokens: Path to tokens.txt 70 /// - tokens: Path to tokens.txt
29 /// - numThreads: Number of threads to use for neural network computation. 71 /// - numThreads: Number of threads to use for neural network computation.
30 /// 72 ///
31 /// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig 73 /// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
32 -func sherpaOnnxOnlineTransducerModelConfig(  
33 - encoder: String,  
34 - decoder: String,  
35 - joiner: String, 74 +func sherpaOnnxOnlineModelConfig(
36 tokens: String, 75 tokens: String,
37 - numThreads: Int = 2, 76 + transducer: SherpaOnnxOnlineTransducerModelConfig = sherpaOnnxOnlineTransducerModelConfig(),
  77 + paraformer: SherpaOnnxOnlineParaformerModelConfig = sherpaOnnxOnlineParaformerModelConfig(),
  78 + numThreads: Int = 1,
38 provider: String = "cpu", 79 provider: String = "cpu",
39 debug: Int = 0, 80 debug: Int = 0,
40 modelType: String = "" 81 modelType: String = ""
41 -) -> SherpaOnnxOnlineTransducerModelConfig {  
42 - return SherpaOnnxOnlineTransducerModelConfig(  
43 - encoder: toCPointer(encoder),  
44 - decoder: toCPointer(decoder),  
45 - joiner: toCPointer(joiner), 82 +) -> SherpaOnnxOnlineModelConfig {
  83 + return SherpaOnnxOnlineModelConfig(
  84 + transducer: transducer,
  85 + paraformer: paraformer,
46 tokens: toCPointer(tokens), 86 tokens: toCPointer(tokens),
47 num_threads: Int32(numThreads), 87 num_threads: Int32(numThreads),
48 provider: toCPointer(provider), 88 provider: toCPointer(provider),
@@ -62,7 +102,7 @@ func sherpaOnnxFeatureConfig( @@ -62,7 +102,7 @@ func sherpaOnnxFeatureConfig(
62 102
63 func sherpaOnnxOnlineRecognizerConfig( 103 func sherpaOnnxOnlineRecognizerConfig(
64 featConfig: SherpaOnnxFeatureConfig, 104 featConfig: SherpaOnnxFeatureConfig,
65 - modelConfig: SherpaOnnxOnlineTransducerModelConfig, 105 + modelConfig: SherpaOnnxOnlineModelConfig,
66 enableEndpoint: Bool = false, 106 enableEndpoint: Bool = false,
67 rule1MinTrailingSilence: Float = 2.4, 107 rule1MinTrailingSilence: Float = 2.4,
68 rule2MinTrailingSilence: Float = 1.2, 108 rule2MinTrailingSilence: Float = 1.2,
@@ -13,17 +13,24 @@ extension AVAudioPCMBuffer { @@ -13,17 +13,24 @@ extension AVAudioPCMBuffer {
13 } 13 }
14 14
15 func run() { 15 func run() {
16 - let encoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"  
17 - let decoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"  
18 - let joiner = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx" 16 + let encoder =
  17 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
  18 + let decoder =
  19 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
  20 + let joiner =
  21 + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"
19 let tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt" 22 let tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"
20 23
21 - let modelConfig = sherpaOnnxOnlineTransducerModelConfig( 24 + let transducerConfig = sherpaOnnxOnlineTransducerModelConfig(
22 encoder: encoder, 25 encoder: encoder,
23 decoder: decoder, 26 decoder: decoder,
24 - joiner: joiner, 27 + joiner: joiner
  28 + )
  29 +
  30 + let modelConfig = sherpaOnnxOnlineModelConfig(
25 tokens: tokens, 31 tokens: tokens,
26 - numThreads: 2) 32 + transducer: transducerConfig
  33 + )
27 34
28 let featConfig = sherpaOnnxFeatureConfig( 35 let featConfig = sherpaOnnxFeatureConfig(
29 sampleRate: 16000, 36 sampleRate: 16000,
@@ -31,13 +38,9 @@ func run() { @@ -31,13 +38,9 @@ func run() {
31 ) 38 )
32 var config = sherpaOnnxOnlineRecognizerConfig( 39 var config = sherpaOnnxOnlineRecognizerConfig(
33 featConfig: featConfig, 40 featConfig: featConfig,
34 - modelConfig: modelConfig,  
35 - enableEndpoint: false,  
36 - decodingMethod: "modified_beam_search",  
37 - maxActivePaths: 4 41 + modelConfig: modelConfig
38 ) 42 )
39 43
40 -  
41 let recognizer = SherpaOnnxRecognizer(config: &config) 44 let recognizer = SherpaOnnxRecognizer(config: &config)
42 45
43 let filePath = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav" 46 let filePath = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav"
@@ -60,7 +63,7 @@ func run() { @@ -60,7 +63,7 @@ func run() {
60 recognizer.acceptWaveform(samples: tailPadding) 63 recognizer.acceptWaveform(samples: tailPadding)
61 64
62 recognizer.inputFinished() 65 recognizer.inputFinished()
63 - while (recognizer.isReady()) { 66 + while recognizer.isReady() {
64 recognizer.decode() 67 recognizer.decode()
65 } 68 }
66 69