yujinqiu
Committed by GitHub

Generate SRT from audio (#341)

正在显示 19 个修改的文件 包含 984 行增加0 行删除
@@ -18,3 +18,37 @@ extension AVAudioPCMBuffer { @@ -18,3 +18,37 @@ extension AVAudioPCMBuffer {
18 return self.audioBufferList.pointee.mBuffers.array() 18 return self.audioBufferList.pointee.mBuffers.array()
19 } 19 }
20 } 20 }
  21 +
  22 +extension TimeInterval {
  23 + var hourMinuteSecondMS: String {
  24 + String(format: "%d:%02d:%02d,%03d", hour, minute, second, millisecond)
  25 + }
  26 +
  27 + var hour: Int {
  28 + Int((self / 3600).truncatingRemainder(dividingBy: 3600))
  29 + }
  30 + var minute: Int {
  31 + Int((self / 60).truncatingRemainder(dividingBy: 60))
  32 + }
  33 + var second: Int {
  34 + Int(truncatingRemainder(dividingBy: 60))
  35 + }
  36 + var millisecond: Int {
  37 + Int((self * 1000).truncatingRemainder(dividingBy: 1000))
  38 + }
  39 +}
  40 +
  41 +extension String {
  42 + var fileURL: URL {
  43 + return URL(fileURLWithPath: self)
  44 + }
  45 + var pathExtension: String {
  46 + return fileURL.pathExtension
  47 + }
  48 + var lastPathComponent: String {
  49 + return fileURL.lastPathComponent
  50 + }
  51 + var stringByDeletingPathExtension: String {
  52 + return fileURL.deletingPathExtension().path
  53 + }
  54 +}
  1 +tiny.en-tokens.txt
  2 +*.onnx
  3 +*.ort
  1 +// !$*UTF8*$!
  2 +{
  3 + archiveVersion = 1;
  4 + classes = {
  5 + };
  6 + objectVersion = 56;
  7 + objects = {
  8 +
  9 +/* Begin PBXBuildFile section */
  10 + DE081A8F2ABF287C00E8CD63 /* SherpaOnnx.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081A8E2ABF287C00E8CD63 /* SherpaOnnx.swift */; };
  11 + DE081A922ABF28D400E8CD63 /* SubtitleViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081A912ABF28D400E8CD63 /* SubtitleViewModel.swift */; };
  12 + DE081A952ABFC60E00E8CD63 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081A942ABFC60E00E8CD63 /* Model.swift */; };
  13 + DE081AAF2ABFF35400E8CD63 /* UTType.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081AAE2ABFF35400E8CD63 /* UTType.swift */; };
  14 + DE081AB12ABFFEEE00E8CD63 /* Document.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081AB02ABFFEEE00E8CD63 /* Document.swift */; };
  15 + DE081AB32ABFFF2600E8CD63 /* Errors.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081AB22ABFFF2600E8CD63 /* Errors.swift */; };
  16 + DE8C85A62ABF23E100F667E3 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = DE8C85A52ABF23E100F667E3 /* onnxruntime.xcframework */; };
  17 + DE8C85AA2ABF23FA00F667E3 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = DE8C85A92ABF23FA00F667E3 /* sherpa-onnx.xcframework */; };
  18 + DE8C85B22ABF257200F667E3 /* SpeechSegment.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE8C85B12ABF257200F667E3 /* SpeechSegment.swift */; };
  19 + DEA22DF12AC1796C00549373 /* tiny.en-encoder.int8.onnx in Resources */ = {isa = PBXBuildFile; fileRef = DEA22DEE2AC1796C00549373 /* tiny.en-encoder.int8.onnx */; };
  20 + DEA22DF22AC1796C00549373 /* tiny.en-decoder.int8.onnx in Resources */ = {isa = PBXBuildFile; fileRef = DEA22DEF2AC1796C00549373 /* tiny.en-decoder.int8.onnx */; };
  21 + DEA22DF32AC1796C00549373 /* tiny.en-tokens.txt in Resources */ = {isa = PBXBuildFile; fileRef = DEA22DF02AC1796C00549373 /* tiny.en-tokens.txt */; };
  22 + DEA22DF52AC179E500549373 /* silero_vad.onnx in Resources */ = {isa = PBXBuildFile; fileRef = DEA22DF42AC179CA00549373 /* silero_vad.onnx */; };
  23 + DEA657152ABF19730066A81D /* SherpaOnnxSubtitleApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEA657142ABF19730066A81D /* SherpaOnnxSubtitleApp.swift */; };
  24 + DEA657172ABF19730066A81D /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEA657162ABF19730066A81D /* ContentView.swift */; };
  25 + DEA657192ABF19740066A81D /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = DEA657182ABF19740066A81D /* Assets.xcassets */; };
  26 + DEA6571C2ABF19740066A81D /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = DEA6571B2ABF19740066A81D /* Preview Assets.xcassets */; };
  27 + DEA657232ABF20130066A81D /* Audio.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEA657222ABF20130066A81D /* Audio.swift */; };
  28 + DED059702AC136FF00122A60 /* Extension.swift in Sources */ = {isa = PBXBuildFile; fileRef = DED0596F2AC136FF00122A60 /* Extension.swift */; };
  29 +/* End PBXBuildFile section */
  30 +
  31 +/* Begin PBXFileReference section */
  32 + DE081A8E2ABF287C00E8CD63 /* SherpaOnnx.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = SherpaOnnx.swift; path = "../../../swift-api-examples/SherpaOnnx.swift"; sourceTree = "<group>"; };
  33 + DE081A912ABF28D400E8CD63 /* SubtitleViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SubtitleViewModel.swift; sourceTree = "<group>"; };
  34 + DE081A942ABFC60E00E8CD63 /* Model.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = Model.swift; path = ../../SherpaOnnx2Pass/SherpaOnnx2Pass/Model.swift; sourceTree = "<group>"; };
  35 + DE081AAC2ABFF30A00E8CD63 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = "<group>"; };
  36 + DE081AAE2ABFF35400E8CD63 /* UTType.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = UTType.swift; sourceTree = "<group>"; };
  37 + DE081AB02ABFFEEE00E8CD63 /* Document.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Document.swift; sourceTree = "<group>"; };
  38 + DE081AB22ABFFF2600E8CD63 /* Errors.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Errors.swift; sourceTree = "<group>"; };
  39 + DE8C85A52ABF23E100F667E3 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.16.0/onnxruntime.xcframework"; sourceTree = "<group>"; };
  40 + DE8C85A92ABF23FA00F667E3 /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = "<group>"; };
  41 + DE8C85B12ABF257200F667E3 /* SpeechSegment.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpeechSegment.swift; sourceTree = "<group>"; };
  42 + DEA22DEE2AC1796C00549373 /* tiny.en-encoder.int8.onnx */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "tiny.en-encoder.int8.onnx"; sourceTree = "<group>"; };
  43 + DEA22DEF2AC1796C00549373 /* tiny.en-decoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; path = "tiny.en-decoder.int8.onnx"; sourceTree = "<group>"; };
  44 + DEA22DF02AC1796C00549373 /* tiny.en-tokens.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "tiny.en-tokens.txt"; sourceTree = "<group>"; };
  45 + DEA22DF42AC179CA00549373 /* silero_vad.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; path = silero_vad.onnx; sourceTree = "<group>"; };
  46 + DEA657112ABF19730066A81D /* SherpaOnnxSubtitle.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnxSubtitle.app; sourceTree = BUILT_PRODUCTS_DIR; };
  47 + DEA657142ABF19730066A81D /* SherpaOnnxSubtitleApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxSubtitleApp.swift; sourceTree = "<group>"; };
  48 + DEA657162ABF19730066A81D /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
  49 + DEA657182ABF19740066A81D /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
  50 + DEA6571B2ABF19740066A81D /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
  51 + DEA657222ABF20130066A81D /* Audio.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Audio.swift; sourceTree = "<group>"; };
  52 + DED0596F2AC136FF00122A60 /* Extension.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = Extension.swift; path = ../../../SherpaOnnx2Pass/SherpaOnnx2Pass/Extension.swift; sourceTree = "<group>"; };
  53 +/* End PBXFileReference section */
  54 +
  55 +/* Begin PBXFrameworksBuildPhase section */
  56 + DEA6570E2ABF19730066A81D /* Frameworks */ = {
  57 + isa = PBXFrameworksBuildPhase;
  58 + buildActionMask = 2147483647;
  59 + files = (
  60 + DE8C85A62ABF23E100F667E3 /* onnxruntime.xcframework in Frameworks */,
  61 + DE8C85AA2ABF23FA00F667E3 /* sherpa-onnx.xcframework in Frameworks */,
  62 + );
  63 + runOnlyForDeploymentPostprocessing = 0;
  64 + };
  65 +/* End PBXFrameworksBuildPhase section */
  66 +
  67 +/* Begin PBXGroup section */
  68 + DE081A902ABF28BE00E8CD63 /* Models */ = {
  69 + isa = PBXGroup;
  70 + children = (
  71 + DEA657222ABF20130066A81D /* Audio.swift */,
  72 + DE8C85B12ABF257200F667E3 /* SpeechSegment.swift */,
  73 + DE081AB02ABFFEEE00E8CD63 /* Document.swift */,
  74 + DE081AB22ABFFF2600E8CD63 /* Errors.swift */,
  75 + );
  76 + path = Models;
  77 + sourceTree = "<group>";
  78 + };
  79 + DE081AAD2ABFF34900E8CD63 /* Extensions */ = {
  80 + isa = PBXGroup;
  81 + children = (
  82 + DED0596F2AC136FF00122A60 /* Extension.swift */,
  83 + DE081AAE2ABFF35400E8CD63 /* UTType.swift */,
  84 + );
  85 + path = Extensions;
  86 + sourceTree = "<group>";
  87 + };
  88 + DE8C85A42ABF23E100F667E3 /* Frameworks */ = {
  89 + isa = PBXGroup;
  90 + children = (
  91 + DE8C85A92ABF23FA00F667E3 /* sherpa-onnx.xcframework */,
  92 + DE8C85A52ABF23E100F667E3 /* onnxruntime.xcframework */,
  93 + );
  94 + name = Frameworks;
  95 + sourceTree = "<group>";
  96 + };
  97 + DEA657082ABF19730066A81D = {
  98 + isa = PBXGroup;
  99 + children = (
  100 + DEA657132ABF19730066A81D /* SherpaOnnxSubtitle */,
  101 + DEA657122ABF19730066A81D /* Products */,
  102 + DE8C85A42ABF23E100F667E3 /* Frameworks */,
  103 + );
  104 + sourceTree = "<group>";
  105 + };
  106 + DEA657122ABF19730066A81D /* Products */ = {
  107 + isa = PBXGroup;
  108 + children = (
  109 + DEA657112ABF19730066A81D /* SherpaOnnxSubtitle.app */,
  110 + );
  111 + name = Products;
  112 + sourceTree = "<group>";
  113 + };
  114 + DEA657132ABF19730066A81D /* SherpaOnnxSubtitle */ = {
  115 + isa = PBXGroup;
  116 + children = (
  117 + DEA22DEF2AC1796C00549373 /* tiny.en-decoder.int8.onnx */,
  118 + DEA22DEE2AC1796C00549373 /* tiny.en-encoder.int8.onnx */,
  119 + DEA22DF02AC1796C00549373 /* tiny.en-tokens.txt */,
  120 + DEA22DF42AC179CA00549373 /* silero_vad.onnx */,
  121 + DE081AAC2ABFF30A00E8CD63 /* Info.plist */,
  122 + DE081A8E2ABF287C00E8CD63 /* SherpaOnnx.swift */,
  123 + DEA657142ABF19730066A81D /* SherpaOnnxSubtitleApp.swift */,
  124 + DEA657162ABF19730066A81D /* ContentView.swift */,
  125 + DE081A912ABF28D400E8CD63 /* SubtitleViewModel.swift */,
  126 + DE081AAD2ABFF34900E8CD63 /* Extensions */,
  127 + DE081A942ABFC60E00E8CD63 /* Model.swift */,
  128 + DE081A902ABF28BE00E8CD63 /* Models */,
  129 + DEA657182ABF19740066A81D /* Assets.xcassets */,
  130 + DEA6571A2ABF19740066A81D /* Preview Content */,
  131 + );
  132 + path = SherpaOnnxSubtitle;
  133 + sourceTree = "<group>";
  134 + };
  135 + DEA6571A2ABF19740066A81D /* Preview Content */ = {
  136 + isa = PBXGroup;
  137 + children = (
  138 + DEA6571B2ABF19740066A81D /* Preview Assets.xcassets */,
  139 + );
  140 + path = "Preview Content";
  141 + sourceTree = "<group>";
  142 + };
  143 +/* End PBXGroup section */
  144 +
  145 +/* Begin PBXNativeTarget section */
  146 + DEA657102ABF19730066A81D /* SherpaOnnxSubtitle */ = {
  147 + isa = PBXNativeTarget;
  148 + buildConfigurationList = DEA6571F2ABF19740066A81D /* Build configuration list for PBXNativeTarget "SherpaOnnxSubtitle" */;
  149 + buildPhases = (
  150 + DEA6570D2ABF19730066A81D /* Sources */,
  151 + DEA6570E2ABF19730066A81D /* Frameworks */,
  152 + DEA6570F2ABF19730066A81D /* Resources */,
  153 + );
  154 + buildRules = (
  155 + );
  156 + dependencies = (
  157 + );
  158 + name = SherpaOnnxSubtitle;
  159 + productName = SherpaOnnxSubtitle;
  160 + productReference = DEA657112ABF19730066A81D /* SherpaOnnxSubtitle.app */;
  161 + productType = "com.apple.product-type.application";
  162 + };
  163 +/* End PBXNativeTarget section */
  164 +
  165 +/* Begin PBXProject section */
  166 + DEA657092ABF19730066A81D /* Project object */ = {
  167 + isa = PBXProject;
  168 + attributes = {
  169 + BuildIndependentTargetsInParallel = 1;
  170 + LastSwiftUpdateCheck = 1500;
  171 + LastUpgradeCheck = 1500;
  172 + TargetAttributes = {
  173 + DEA657102ABF19730066A81D = {
  174 + CreatedOnToolsVersion = 15.0;
  175 + };
  176 + };
  177 + };
  178 + buildConfigurationList = DEA6570C2ABF19730066A81D /* Build configuration list for PBXProject "SherpaOnnxSubtitle" */;
  179 + compatibilityVersion = "Xcode 14.0";
  180 + developmentRegion = en;
  181 + hasScannedForEncodings = 0;
  182 + knownRegions = (
  183 + en,
  184 + Base,
  185 + );
  186 + mainGroup = DEA657082ABF19730066A81D;
  187 + productRefGroup = DEA657122ABF19730066A81D /* Products */;
  188 + projectDirPath = "";
  189 + projectRoot = "";
  190 + targets = (
  191 + DEA657102ABF19730066A81D /* SherpaOnnxSubtitle */,
  192 + );
  193 + };
  194 +/* End PBXProject section */
  195 +
  196 +/* Begin PBXResourcesBuildPhase section */
  197 + DEA6570F2ABF19730066A81D /* Resources */ = {
  198 + isa = PBXResourcesBuildPhase;
  199 + buildActionMask = 2147483647;
  200 + files = (
  201 + DEA22DF52AC179E500549373 /* silero_vad.onnx in Resources */,
  202 + DEA6571C2ABF19740066A81D /* Preview Assets.xcassets in Resources */,
  203 + DEA22DF12AC1796C00549373 /* tiny.en-encoder.int8.onnx in Resources */,
  204 + DEA657192ABF19740066A81D /* Assets.xcassets in Resources */,
  205 + DEA22DF22AC1796C00549373 /* tiny.en-decoder.int8.onnx in Resources */,
  206 + DEA22DF32AC1796C00549373 /* tiny.en-tokens.txt in Resources */,
  207 + );
  208 + runOnlyForDeploymentPostprocessing = 0;
  209 + };
  210 +/* End PBXResourcesBuildPhase section */
  211 +
  212 +/* Begin PBXSourcesBuildPhase section */
  213 + DEA6570D2ABF19730066A81D /* Sources */ = {
  214 + isa = PBXSourcesBuildPhase;
  215 + buildActionMask = 2147483647;
  216 + files = (
  217 + DE081AAF2ABFF35400E8CD63 /* UTType.swift in Sources */,
  218 + DE8C85B22ABF257200F667E3 /* SpeechSegment.swift in Sources */,
  219 + DE081A922ABF28D400E8CD63 /* SubtitleViewModel.swift in Sources */,
  220 + DE081AB12ABFFEEE00E8CD63 /* Document.swift in Sources */,
  221 + DED059702AC136FF00122A60 /* Extension.swift in Sources */,
  222 + DEA657172ABF19730066A81D /* ContentView.swift in Sources */,
  223 + DEA657152ABF19730066A81D /* SherpaOnnxSubtitleApp.swift in Sources */,
  224 + DE081AB32ABFFF2600E8CD63 /* Errors.swift in Sources */,
  225 + DEA657232ABF20130066A81D /* Audio.swift in Sources */,
  226 + DE081A8F2ABF287C00E8CD63 /* SherpaOnnx.swift in Sources */,
  227 + DE081A952ABFC60E00E8CD63 /* Model.swift in Sources */,
  228 + );
  229 + runOnlyForDeploymentPostprocessing = 0;
  230 + };
  231 +/* End PBXSourcesBuildPhase section */
  232 +
  233 +/* Begin XCBuildConfiguration section */
  234 + DEA6571D2ABF19740066A81D /* Debug */ = {
  235 + isa = XCBuildConfiguration;
  236 + buildSettings = {
  237 + ALWAYS_SEARCH_USER_PATHS = NO;
  238 + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
  239 + CLANG_ANALYZER_NONNULL = YES;
  240 + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
  241 + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
  242 + CLANG_ENABLE_MODULES = YES;
  243 + CLANG_ENABLE_OBJC_ARC = YES;
  244 + CLANG_ENABLE_OBJC_WEAK = YES;
  245 + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
  246 + CLANG_WARN_BOOL_CONVERSION = YES;
  247 + CLANG_WARN_COMMA = YES;
  248 + CLANG_WARN_CONSTANT_CONVERSION = YES;
  249 + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
  250 + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
  251 + CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
  252 + CLANG_WARN_EMPTY_BODY = YES;
  253 + CLANG_WARN_ENUM_CONVERSION = YES;
  254 + CLANG_WARN_INFINITE_RECURSION = YES;
  255 + CLANG_WARN_INT_CONVERSION = YES;
  256 + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
  257 + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
  258 + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
  259 + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
  260 + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
  261 + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
  262 + CLANG_WARN_STRICT_PROTOTYPES = YES;
  263 + CLANG_WARN_SUSPICIOUS_MOVE = YES;
  264 + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
  265 + CLANG_WARN_UNREACHABLE_CODE = YES;
  266 + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
  267 + COPY_PHASE_STRIP = NO;
  268 + DEBUG_INFORMATION_FORMAT = dwarf;
  269 + ENABLE_STRICT_OBJC_MSGSEND = YES;
  270 + ENABLE_TESTABILITY = YES;
  271 + ENABLE_USER_SCRIPT_SANDBOXING = YES;
  272 + GCC_C_LANGUAGE_STANDARD = gnu17;
  273 + GCC_DYNAMIC_NO_PIC = NO;
  274 + GCC_NO_COMMON_BLOCKS = YES;
  275 + GCC_OPTIMIZATION_LEVEL = 0;
  276 + GCC_PREPROCESSOR_DEFINITIONS = (
  277 + "DEBUG=1",
  278 + "$(inherited)",
  279 + );
  280 + GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
  281 + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
  282 + GCC_WARN_UNDECLARED_SELECTOR = YES;
  283 + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
  284 + GCC_WARN_UNUSED_FUNCTION = YES;
  285 + GCC_WARN_UNUSED_VARIABLE = YES;
  286 + IPHONEOS_DEPLOYMENT_TARGET = 16.0;
  287 + LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
  288 + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
  289 + MTL_FAST_MATH = YES;
  290 + ONLY_ACTIVE_ARCH = YES;
  291 + SDKROOT = iphoneos;
  292 + SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
  293 + SWIFT_OPTIMIZATION_LEVEL = "-Onone";
  294 + };
  295 + name = Debug;
  296 + };
  297 + DEA6571E2ABF19740066A81D /* Release */ = {
  298 + isa = XCBuildConfiguration;
  299 + buildSettings = {
  300 + ALWAYS_SEARCH_USER_PATHS = NO;
  301 + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
  302 + CLANG_ANALYZER_NONNULL = YES;
  303 + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
  304 + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
  305 + CLANG_ENABLE_MODULES = YES;
  306 + CLANG_ENABLE_OBJC_ARC = YES;
  307 + CLANG_ENABLE_OBJC_WEAK = YES;
  308 + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
  309 + CLANG_WARN_BOOL_CONVERSION = YES;
  310 + CLANG_WARN_COMMA = YES;
  311 + CLANG_WARN_CONSTANT_CONVERSION = YES;
  312 + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
  313 + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
  314 + CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
  315 + CLANG_WARN_EMPTY_BODY = YES;
  316 + CLANG_WARN_ENUM_CONVERSION = YES;
  317 + CLANG_WARN_INFINITE_RECURSION = YES;
  318 + CLANG_WARN_INT_CONVERSION = YES;
  319 + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
  320 + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
  321 + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
  322 + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
  323 + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
  324 + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
  325 + CLANG_WARN_STRICT_PROTOTYPES = YES;
  326 + CLANG_WARN_SUSPICIOUS_MOVE = YES;
  327 + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
  328 + CLANG_WARN_UNREACHABLE_CODE = YES;
  329 + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
  330 + COPY_PHASE_STRIP = NO;
  331 + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
  332 + ENABLE_NS_ASSERTIONS = NO;
  333 + ENABLE_STRICT_OBJC_MSGSEND = YES;
  334 + ENABLE_USER_SCRIPT_SANDBOXING = YES;
  335 + GCC_C_LANGUAGE_STANDARD = gnu17;
  336 + GCC_NO_COMMON_BLOCKS = YES;
  337 + GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
  338 + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
  339 + GCC_WARN_UNDECLARED_SELECTOR = YES;
  340 + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
  341 + GCC_WARN_UNUSED_FUNCTION = YES;
  342 + GCC_WARN_UNUSED_VARIABLE = YES;
  343 + IPHONEOS_DEPLOYMENT_TARGET = 16.0;
  344 + LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
  345 + MTL_ENABLE_DEBUG_INFO = NO;
  346 + MTL_FAST_MATH = YES;
  347 + SDKROOT = iphoneos;
  348 + SWIFT_COMPILATION_MODE = wholemodule;
  349 + VALIDATE_PRODUCT = YES;
  350 + };
  351 + name = Release;
  352 + };
  353 + DEA657202ABF19740066A81D /* Debug */ = {
  354 + isa = XCBuildConfiguration;
  355 + buildSettings = {
  356 + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
  357 + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
  358 + CODE_SIGN_STYLE = Automatic;
  359 + CURRENT_PROJECT_VERSION = 1;
  360 + DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnxSubtitle/Preview Content\"";
  361 + DEVELOPMENT_TEAM = 896WS4KUPV;
  362 + ENABLE_PREVIEWS = YES;
  363 + GENERATE_INFOPLIST_FILE = YES;
  364 + HEADER_SEARCH_PATHS = "${PROJECT_DIR}/../../build-ios/sherpa-onnx.xcframework/Headers/";
  365 + INFOPLIST_FILE = SherpaOnnxSubtitle/Info.plist;
  366 + INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
  367 + INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
  368 + INFOPLIST_KEY_UILaunchScreen_Generation = YES;
  369 + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
  370 + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
  371 + IPHONEOS_DEPLOYMENT_TARGET = 16.0;
  372 + LD_RUNPATH_SEARCH_PATHS = (
  373 + "$(inherited)",
  374 + "@executable_path/Frameworks",
  375 + );
  376 + MARKETING_VERSION = 1.0;
  377 + OTHER_LDFLAGS = "-lc++";
  378 + PRODUCT_BUNDLE_IDENTIFIER = net.duoziwei.SherpaOnnxSubtitle;
  379 + PRODUCT_NAME = "$(TARGET_NAME)";
  380 + SWIFT_EMIT_LOC_STRINGS = YES;
  381 + SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
  382 + SWIFT_VERSION = 5.0;
  383 + TARGETED_DEVICE_FAMILY = "1,2";
  384 + };
  385 + name = Debug;
  386 + };
  387 + DEA657212ABF19740066A81D /* Release */ = {
  388 + isa = XCBuildConfiguration;
  389 + buildSettings = {
  390 + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
  391 + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
  392 + CODE_SIGN_STYLE = Automatic;
  393 + CURRENT_PROJECT_VERSION = 1;
  394 + DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnxSubtitle/Preview Content\"";
  395 + DEVELOPMENT_TEAM = 896WS4KUPV;
  396 + ENABLE_PREVIEWS = YES;
  397 + GENERATE_INFOPLIST_FILE = YES;
  398 + HEADER_SEARCH_PATHS = "${PROJECT_DIR}/../../build-ios/sherpa-onnx.xcframework/Headers/";
  399 + INFOPLIST_FILE = SherpaOnnxSubtitle/Info.plist;
  400 + INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
  401 + INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
  402 + INFOPLIST_KEY_UILaunchScreen_Generation = YES;
  403 + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
  404 + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
  405 + IPHONEOS_DEPLOYMENT_TARGET = 16.0;
  406 + LD_RUNPATH_SEARCH_PATHS = (
  407 + "$(inherited)",
  408 + "@executable_path/Frameworks",
  409 + );
  410 + MARKETING_VERSION = 1.0;
  411 + OTHER_LDFLAGS = "-lc++";
  412 + PRODUCT_BUNDLE_IDENTIFIER = net.duoziwei.SherpaOnnxSubtitle;
  413 + PRODUCT_NAME = "$(TARGET_NAME)";
  414 + SWIFT_EMIT_LOC_STRINGS = YES;
  415 + SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
  416 + SWIFT_VERSION = 5.0;
  417 + TARGETED_DEVICE_FAMILY = "1,2";
  418 + };
  419 + name = Release;
  420 + };
  421 +/* End XCBuildConfiguration section */
  422 +
  423 +/* Begin XCConfigurationList section */
  424 + DEA6570C2ABF19730066A81D /* Build configuration list for PBXProject "SherpaOnnxSubtitle" */ = {
  425 + isa = XCConfigurationList;
  426 + buildConfigurations = (
  427 + DEA6571D2ABF19740066A81D /* Debug */,
  428 + DEA6571E2ABF19740066A81D /* Release */,
  429 + );
  430 + defaultConfigurationIsVisible = 0;
  431 + defaultConfigurationName = Release;
  432 + };
  433 + DEA6571F2ABF19740066A81D /* Build configuration list for PBXNativeTarget "SherpaOnnxSubtitle" */ = {
  434 + isa = XCConfigurationList;
  435 + buildConfigurations = (
  436 + DEA657202ABF19740066A81D /* Debug */,
  437 + DEA657212ABF19740066A81D /* Release */,
  438 + );
  439 + defaultConfigurationIsVisible = 0;
  440 + defaultConfigurationName = Release;
  441 + };
  442 +/* End XCConfigurationList section */
  443 + };
  444 + rootObject = DEA657092ABF19730066A81D /* Project object */;
  445 +}
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<Workspace
  3 + version = "1.0">
  4 + <FileRef
  5 + location = "self:">
  6 + </FileRef>
  7 +</Workspace>
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
  3 +<plist version="1.0">
  4 +<dict>
  5 + <key>IDEDidComputeMac32BitWarning</key>
  6 + <true/>
  7 +</dict>
  8 +</plist>
  1 +{
  2 + "colors" : [
  3 + {
  4 + "idiom" : "universal"
  5 + }
  6 + ],
  7 + "info" : {
  8 + "author" : "xcode",
  9 + "version" : 1
  10 + }
  11 +}
  1 +{
  2 + "images" : [
  3 + {
  4 + "filename" : "k2-1024x1024.png",
  5 + "idiom" : "universal",
  6 + "platform" : "ios",
  7 + "size" : "1024x1024"
  8 + }
  9 + ],
  10 + "info" : {
  11 + "author" : "xcode",
  12 + "version" : 1
  13 + }
  14 +}
  1 +{
  2 + "info" : {
  3 + "author" : "xcode",
  4 + "version" : 1
  5 + }
  6 +}
  1 +//
  2 +// ContentView.swift
  3 +// SherpaOnnxSubtitle
  4 +//
  5 +// Created by knight on 2023/9/23.
  6 +//
  7 +
  8 +import AVKit
  9 +import MediaPlayer
  10 +import PhotosUI
  11 +import SwiftUI
  12 +
  13 +struct ContentView: View {
  14 + @StateObject var subtitleViewModel = SubtitleViewModel()
  15 +
  16 + var body: some View {
  17 + VStack {
  18 + VStack {
  19 + Text("SherpaOnnxSubtitle")
  20 + .font(.title)
  21 + VStack(alignment: .leading) {
  22 + Text("Audio format should be **mono** channel and **16khz** sample rate")
  23 +
  24 + Text("You can convert file with the help of ffmpeg")
  25 + Text("```ffmpeg -i ./foo.mov -acodec pcm_s16le -ac 1 -ar 16000 foo.wav```")
  26 + }
  27 + }
  28 + .padding(.vertical)
  29 + PhotosPicker(
  30 + selection: $subtitleViewModel.selectedItem,
  31 + matching: .videos
  32 + ) {
  33 + Label("Open Audio from Photo Library", systemImage: "photo")
  34 + .frame(minWidth: 0, maxWidth: .infinity)
  35 + .padding()
  36 + .background(.blue, in: .rect(cornerRadius: 8.0))
  37 + .foregroundColor(.white)
  38 + }
  39 +
  40 + Button(action: {
  41 + subtitleViewModel.importNow = true
  42 + }, label: {
  43 + Text("Open Audio from Files")
  44 + .frame(minWidth: 0, maxWidth: .infinity)
  45 + .padding()
  46 + .background(.blue, in: .rect(cornerRadius: 8.0))
  47 + })
  48 + .foregroundColor(.white)
  49 + switch subtitleViewModel.loadState {
  50 + case .initial, .loaded(_), .done:
  51 + EmptyView()
  52 + case .loading:
  53 + ProgressView()
  54 + case .failed:
  55 + Text("Gen SRT failed")
  56 + }
  57 + }
  58 + .fileImporter(isPresented: $subtitleViewModel.importNow, allowedContentTypes: [.movie, .audio], onCompletion: handleImportCompletion)
  59 + .onChange(of: subtitleViewModel.importNow) { importNow in
  60 + if !importNow {
  61 + subtitleViewModel.restoreState()
  62 + }
  63 + }
  64 + .fileExporter(isPresented: $subtitleViewModel.exportNow,
  65 + document: subtitleViewModel.srtDocument, contentType: .srt,
  66 + defaultFilename: subtitleViewModel.srtName,
  67 + onCompletion: handleExportCompletion)
  68 + .task(id: subtitleViewModel.selectedItem) {
  69 + do {
  70 + if !subtitleViewModel.hasAudio {
  71 + return
  72 + }
  73 + subtitleViewModel.loadState = .loading
  74 +
  75 + if let movie = try await subtitleViewModel.selectedItem?.loadTransferable(type: Audio.self) {
  76 + subtitleViewModel.loadState = .loaded(movie)
  77 + subtitleViewModel.generateSRT(from: movie.url)
  78 + } else {
  79 + subtitleViewModel.loadState = .failed
  80 + }
  81 + } catch {
  82 + subtitleViewModel.loadState = .failed
  83 + }
  84 + }
  85 + .padding()
  86 + }
  87 +
  88 + private func handleImportCompletion(result: Result<URL, Error>) {
  89 + print("file import...")
  90 + switch result {
  91 + case let .success(file):
  92 + let accessing = file.startAccessingSecurityScopedResource()
  93 + defer {
  94 + if accessing {
  95 + file.stopAccessingSecurityScopedResource()
  96 + }
  97 + }
  98 + subtitleViewModel.generateSRT(from: file)
  99 + case let .failure(error):
  100 + print(error.localizedDescription)
  101 + subtitleViewModel.loadState = .failed
  102 + }
  103 + }
  104 +
  105 + private func handleExportCompletion(result: Result<URL, any Error>) {
  106 + switch result {
  107 + case let .success(url):
  108 + print("audio export to: \(url)")
  109 + subtitleViewModel.loadState = .done
  110 + case let .failure(error):
  111 + print("export audio error: \(error.localizedDescription)")
  112 + subtitleViewModel.loadState = .failed
  113 + }
  114 + }
  115 +}
  116 +
  117 +struct ContentView_Previews: PreviewProvider {
  118 + static var previews: some View {
  119 + ContentView()
  120 + }
  121 +}
  1 +//
  2 +// UTType.swift
  3 +// YPlayer
  4 +//
  5 +// Created by knight on 2023/7/7.
  6 +//
  7 +
  8 +import UniformTypeIdentifiers
  9 +
  10 +extension UTType {
  11 + static var srt: UTType {
  12 + UTType(exportedAs: "com.k2.srt")
  13 + }
  14 +}
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
  3 +<plist version="1.0">
  4 +<dict>
  5 + <key>UTExportedTypeDeclarations</key>
  6 + <array>
  7 + <dict>
  8 + <key>UTTypeConformsTo</key>
  9 + <array>
  10 + <string>public.plain-text</string>
  11 + </array>
  12 + <key>UTTypeDescription</key>
  13 + <string>SubRip Subtitle File</string>
  14 + <key>UTTypeIconFiles</key>
  15 + <array/>
  16 + <key>UTTypeIdentifier</key>
  17 + <string>com.k2.srt</string>
  18 + <key>UTTypeTagSpecification</key>
  19 + <dict>
  20 + <key>public.filename-extension</key>
  21 + <array>
  22 + <string>srt</string>
  23 + </array>
  24 + </dict>
  25 + </dict>
  26 + </array>
  27 +</dict>
  28 +</plist>
  1 +//
  2 +// Audio.swift
  3 +// SherpaOnnxSubtitle
  4 +//
  5 +// Created by knight on 2023/9/23.
  6 +//
  7 +
  8 +import SwiftUI
  9 +
  10 +struct Audio: Transferable {
  11 + let url: URL
  12 +
  13 + static var transferRepresentation: some TransferRepresentation {
  14 + FileRepresentation(contentType: .movie) { movie in
  15 + SentTransferredFile(movie.url)
  16 + } importing: { received in
  17 + let copy = URL.documentsDirectory.appending(path: "audio.wav")
  18 +
  19 + if FileManager.default.fileExists(atPath: copy.path()) {
  20 + try FileManager.default.removeItem(at: copy)
  21 + }
  22 +
  23 + try FileManager.default.copyItem(at: received.file, to: copy)
  24 + return Self(url: copy)
  25 + }
  26 + }
  27 +}
  1 +//
  2 +// Document.swift
  3 +// YPlayer
  4 +//
  5 +// Created by knight on 2023/6/5.
  6 +//
  7 +
  8 +import SwiftUI
  9 +import UniformTypeIdentifiers
  10 +
  11 +struct Document: FileDocument {
  12 + static var readableContentTypes = [UTType.srt]
  13 + static var writableContentTypes = [UTType.srt]
  14 + var data: Data?
  15 +
  16 + init(data: Data?) {
  17 + self.data = data
  18 + }
  19 +
  20 + init(configuration: ReadConfiguration) throws {
  21 + if let data = configuration.file.regularFileContents {
  22 + self.data = data
  23 + }
  24 + }
  25 +
  26 + func fileWrapper(configuration _: WriteConfiguration) throws -> FileWrapper {
  27 + guard let data = data else {
  28 + throw ExportError.fileNotFound
  29 + }
  30 + return FileWrapper(regularFileWithContents: data)
  31 + }
  32 +}
  1 +//
  2 +// Errors.swift
  3 +// YPlayer
  4 +//
  5 +// Created by knight on 2023/8/26.
  6 +//
  7 +
  8 +import Foundation
  9 +
  10 +enum ExportError: String, Error {
  11 + case fileNotFound = "export file not found"
  12 +}
  1 +//
  2 +// SpeechSegment.swift
  3 +// SherpaOnnxSubtitle
  4 +//
  5 +// Created by knight on 2023/9/23.
  6 +//
  7 +
  8 +import Foundation
  9 +
  10 +class SpeechSegment: CustomStringConvertible {
  11 + let start: Float
  12 + let end: Float
  13 + let text: String
  14 +
  15 + init(start: Float, duration: Float, text: String) {
  16 + self.start = start
  17 + end = start + duration
  18 + self.text = text
  19 + }
  20 +
  21 + public var description: String {
  22 + var s: String
  23 + s = TimeInterval(start).hourMinuteSecondMS
  24 + s += " --> "
  25 + s += TimeInterval(end).hourMinuteSecondMS
  26 + s += "\n"
  27 + s += text
  28 +
  29 + return s
  30 + }
  31 +}
  1 +{
  2 + "info" : {
  3 + "author" : "xcode",
  4 + "version" : 1
  5 + }
  6 +}
  1 +//
  2 +// SherpaOnnxSubtitleApp.swift
  3 +// SherpaOnnxSubtitle
  4 +//
  5 +// Created by knight on 2023/9/23.
  6 +//
  7 +
  8 +import SwiftUI
  9 +
  10 +@main
  11 +struct SherpaOnnxSubtitleApp: App {
  12 + var body: some Scene {
  13 + WindowGroup {
  14 + ContentView()
  15 + }
  16 + }
  17 +}
  1 +//
  2 +// SubtitleViewModel.swift
  3 +// SherpaOnnxSubtitle
  4 +//
  5 +// Created by knight on 2023/9/23.
  6 +//
  7 +
  8 +import AVFoundation
  9 +import PhotosUI
  10 +import SwiftUI
  11 +
  12 +enum LoadState {
  13 + case initial
  14 + case loading
  15 + case loaded(Audio)
  16 + case done
  17 + case failed
  18 +}
  19 +
  20 +class SubtitleViewModel: ObservableObject {
  21 + var modelType = "whisper"
  22 + let sampleRate = 16000
  23 +
  24 + var modelConfig: SherpaOnnxOfflineModelConfig?
  25 + // modelType = "paraformer"
  26 +
  27 + var recognizer: SherpaOnnxOfflineRecognizer?
  28 +
  29 + var vadModelConfig: SherpaOnnxVadModelConfig?
  30 + var vad: SherpaOnnxVoiceActivityDetectorWrapper?
  31 +
  32 + @Published var loadState: LoadState = .initial
  33 +
  34 + @Published var selectedItem: PhotosPickerItem? = nil
  35 +
  36 + @Published var importNow: Bool = false {
  37 + didSet {
  38 + loadState = .loading
  39 + }
  40 + }
  41 +
  42 + @Published var exportNow: Bool = false
  43 +
  44 + var srtName: String = "unknown.srt"
  45 + var content: String = ""
  46 +
  47 + var srtDocument: Document {
  48 + let content = content.data(using: .utf8)
  49 + return Document(data: content)
  50 + }
  51 +
  52 + var hasAudio: Bool {
  53 + return selectedItem != nil
  54 + }
  55 +
  56 + init() {
  57 + if modelType == "whisper" {
  58 + // for English
  59 + self.modelConfig = getNonStreamingWhisperTinyEn()
  60 + } else if modelType == "paraformer" {
  61 + // for Chinese
  62 + self.modelConfig = getNonStreamingZhParaformer20230328()
  63 + } else {
  64 + print("Please specify a supported modelType \(modelType)")
  65 + return
  66 + }
  67 +
  68 + let featConfig = sherpaOnnxFeatureConfig(
  69 + sampleRate: sampleRate,
  70 + featureDim: 80
  71 + )
  72 +
  73 + guard let modelConfig else {
  74 + return
  75 + }
  76 +
  77 + var config = sherpaOnnxOfflineRecognizerConfig(
  78 + featConfig: featConfig,
  79 + modelConfig: modelConfig
  80 + )
  81 +
  82 + recognizer = SherpaOnnxOfflineRecognizer(config: &config)
  83 +
  84 + let sileroVadConfig = sherpaOnnxSileroVadModelConfig(
  85 + model: getResource("silero_vad", "onnx")
  86 + )
  87 +
  88 + self.vadModelConfig = sherpaOnnxVadModelConfig(sileroVad: sileroVadConfig)
  89 + guard var vadModelConfig else {
  90 + return
  91 + }
  92 + vad = SherpaOnnxVoiceActivityDetectorWrapper(
  93 + config: &vadModelConfig, buffer_size_in_seconds: 120
  94 + )
  95 + }
  96 +
  97 + func restoreState() {
  98 + loadState = .initial
  99 + }
  100 +
  101 + func generateSRT(from file: URL) {
  102 + print("gen srt from: \(file)")
  103 + content = ""
  104 +
  105 + // restore state
  106 + defer {
  107 + loadState = .done
  108 + }
  109 + guard let recognizer else {
  110 + return
  111 + }
  112 + guard let vadModelConfig else {
  113 + return
  114 + }
  115 +
  116 + guard let vad else {
  117 + return
  118 + }
  119 +
  120 + do {
  121 + let audioFile = try AVAudioFile(forReading: file)
  122 + let audioFormat = audioFile.processingFormat
  123 + assert(audioFormat.sampleRate == Double(sampleRate))
  124 + assert(audioFormat.channelCount == 1)
  125 + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
  126 +
  127 + let audioFrameCount = UInt32(audioFile.length)
  128 + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
  129 +
  130 + try audioFile.read(into: audioFileBuffer!)
  131 + var array: [Float]! = audioFileBuffer?.array()
  132 +
  133 + let windowSize = Int(vadModelConfig.silero_vad.window_size)
  134 +
  135 + var segments: [SpeechSegment] = []
  136 +
  137 + while array.count > windowSize {
  138 + // todo(fangjun): avoid extra copies here
  139 + vad.acceptWaveform(samples: [Float](array[0 ..< windowSize]))
  140 + array = [Float](array[windowSize ..< array.count])
  141 +
  142 + while !vad.isEmpty() {
  143 + let s = vad.front()
  144 + vad.pop()
  145 + let result = recognizer.decode(samples: s.samples)
  146 +
  147 + segments.append(
  148 + SpeechSegment(
  149 + start: Float(s.start) / Float(sampleRate),
  150 + duration: Float(s.samples.count) / Float(sampleRate),
  151 + text: result.text
  152 + ))
  153 +
  154 + print(segments.last!)
  155 + }
  156 + }
  157 + content = zip(segments.indices, segments).map { index, element in
  158 + "\(index + 1)\n\(element)"
  159 + }.joined(separator: "\n\n")
  160 + } catch {
  161 + print("error: \(error.localizedDescription)")
  162 + }
  163 + exportNow = true
  164 +
  165 + let last = file.lastPathComponent
  166 + srtName = "\(last).srt"
  167 + }
  168 +}