Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-08-15 15:13:50 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-08-15 15:13:50 +0800
Commit
ea31dbf2764b4878eaf3365d8b04248e16ebb9ef
ea31dbf2
1 parent
7a8e52b7
Refactor Swift API (#2493)
隐藏空白字符变更
内嵌
并排对比
正在显示
2 个修改的文件
包含
136 行增加
和
149 行删除
swift-api-examples/SherpaOnnx.swift
swift-api-examples/generate-subtitles.swift
swift-api-examples/SherpaOnnx.swift
查看文件 @
ea31dbf
...
...
@@ -198,80 +198,63 @@ func sherpaOnnxOnlineRecognizerConfig(
///
class
SherpaOnnxOnlineRecongitionResult
{
/// A pointer to the underlying counterpart in C
let
result
:
UnsafePointer
<
SherpaOnnxOnlineRecognizerResult
>!
/// Return the actual recognition result.
/// For English models, it contains words separated by spaces.
/// For Chinese models, it contains Chinese words.
var
text
:
String
{
return
String
(
cString
:
result
.
pointee
.
text
)
}
var
count
:
Int32
{
return
result
.
pointee
.
count
}
var
tokens
:
[
String
]
{
if
let
tokensPointer
=
result
.
pointee
.
tokens_arr
{
var
tokens
:
[
String
]
=
[]
for
index
in
0
..<
count
{
if
let
tokenPointer
=
tokensPointer
[
Int
(
index
)]
{
let
token
=
String
(
cString
:
tokenPointer
)
tokens
.
append
(
token
)
}
}
return
tokens
}
else
{
let
tokens
:
[
String
]
=
[]
return
tokens
private
let
result
:
UnsafePointer
<
SherpaOnnxOnlineRecognizerResult
>
private
lazy
var
_text
:
String
=
{
guard
let
cstr
=
result
.
pointee
.
text
else
{
return
""
}
return
String
(
cString
:
cstr
)
}()
private
lazy
var
_tokens
:
[
String
]
=
{
guard
let
tokensPointer
=
result
.
pointee
.
tokens_arr
else
{
return
[]
}
return
(
0
..<
count
)
.
compactMap
{
index
in
guard
let
ptr
=
tokensPointer
[
index
]
else
{
return
nil
}
return
String
(
cString
:
ptr
)
}
}
}
()
var
timestamps
:
[
Float
]
{
if
let
p
=
result
.
pointee
.
timestamps
{
var
timestamps
:
[
Float
]
=
[]
for
index
in
0
..<
count
{
timestamps
.
append
(
p
[
Int
(
index
)])
}
return
timestamps
}
else
{
let
timestamps
:
[
Float
]
=
[]
return
timestamps
}
}
private
lazy
var
_timestamps
:
[
Float
]
=
{
guard
let
timestampsPointer
=
result
.
pointee
.
timestamps
else
{
return
[]
}
return
(
0
..<
count
)
.
map
{
index
in
timestampsPointer
[
index
]
}
}()
init
(
result
:
UnsafePointer
<
SherpaOnnxOnlineRecognizerResult
>
!
)
{
init
(
result
:
UnsafePointer
<
SherpaOnnxOnlineRecognizerResult
>
)
{
self
.
result
=
result
}
deinit
{
if
let
result
{
SherpaOnnxDestroyOnlineRecognizerResult
(
result
)
}
SherpaOnnxDestroyOnlineRecognizerResult
(
result
)
}
/// Return the actual recognition result.
/// For English models, it contains words separated by spaces.
/// For Chinese models, it contains Chinese words.
var
text
:
String
{
_text
}
var
count
:
Int
{
Int
(
result
.
pointee
.
count
)
}
var
tokens
:
[
String
]
{
_tokens
}
var
timestamps
:
[
Float
]
{
_timestamps
}
}
class
SherpaOnnxRecognizer
{
/// A pointer to the underlying counterpart in C
let
recognizer
:
OpaquePointer
!
var
stream
:
OpaquePointer
!
private
let
recognizer
:
OpaquePointer
private
var
stream
:
OpaquePointer
private
let
lock
=
NSLock
()
// for thread-safe stream replacement
/// Constructor taking a model config
init
(
config
:
UnsafePointer
<
SherpaOnnxOnlineRecognizerConfig
>
!
config
:
UnsafePointer
<
SherpaOnnxOnlineRecognizerConfig
>
)
{
recognizer
=
SherpaOnnxCreateOnlineRecognizer
(
config
)
stream
=
SherpaOnnxCreateOnlineStream
(
recognizer
)
self
.
recognizer
=
SherpaOnnxCreateOnlineRecognizer
(
config
)
self
.
stream
=
SherpaOnnxCreateOnlineStream
(
recognizer
)
}
deinit
{
if
let
stream
{
SherpaOnnxDestroyOnlineStream
(
stream
)
}
if
let
recognizer
{
SherpaOnnxDestroyOnlineRecognizer
(
recognizer
)
}
SherpaOnnxDestroyOnlineStream
(
stream
)
SherpaOnnxDestroyOnlineRecognizer
(
recognizer
)
}
/// Decode wave samples.
...
...
@@ -280,12 +263,12 @@ class SherpaOnnxRecognizer {
/// - samples: Audio samples normalized to the range [-1, 1]
/// - sampleRate: Sample rate of the input audio samples. Must match
/// the one expected by the model.
func
acceptWaveform
(
samples
:
[
Float
],
sampleRate
:
Int
=
16000
)
{
func
acceptWaveform
(
samples
:
[
Float
],
sampleRate
:
Int
=
16
_
000
)
{
SherpaOnnxOnlineStreamAcceptWaveform
(
stream
,
Int32
(
sampleRate
),
samples
,
Int32
(
samples
.
count
))
}
func
isReady
()
->
Bool
{
return
SherpaOnnxIsOnlineStreamReady
(
recognizer
,
stream
)
==
1
?
true
:
false
return
SherpaOnnxIsOnlineStreamReady
(
recognizer
,
stream
)
!=
0
}
/// If there are enough number of feature frames, it invokes the neural
...
...
@@ -296,8 +279,9 @@ class SherpaOnnxRecognizer {
/// Get the decoding results so far
func
getResult
()
->
SherpaOnnxOnlineRecongitionResult
{
let
result
:
UnsafePointer
<
SherpaOnnxOnlineRecognizerResult
>
?
=
SherpaOnnxGetOnlineStreamResult
(
recognizer
,
stream
)
guard
let
result
=
SherpaOnnxGetOnlineStreamResult
(
recognizer
,
stream
)
else
{
fatalError
(
"SherpaOnnxGetOnlineStreamResult returned nil"
)
}
return
SherpaOnnxOnlineRecongitionResult
(
result
:
result
)
}
...
...
@@ -313,12 +297,14 @@ class SherpaOnnxRecognizer {
}
words
.
withCString
{
cString
in
let
newStream
=
SherpaOnnxCreateOnlineStreamWithHotwords
(
recognizer
,
cString
)
guard
let
newStream
=
SherpaOnnxCreateOnlineStreamWithHotwords
(
recognizer
,
cString
)
else
{
fatalError
(
"SherpaOnnxCreateOnlineStreamWithHotwords returned nil"
)
}
lock
.
lock
()
// lock while release and replace stream
objc_sync_enter
(
self
)
SherpaOnnxDestroyOnlineStream
(
stream
)
stream
=
newStream
objc_sync_exit
(
self
)
lock
.
unlock
(
)
}
}
...
...
@@ -330,7 +316,7 @@ class SherpaOnnxRecognizer {
/// Return true is an endpoint has been detected.
func
isEndpoint
()
->
Bool
{
return
SherpaOnnxOnlineStreamIsEndpoint
(
recognizer
,
stream
)
==
1
?
true
:
false
return
SherpaOnnxOnlineStreamIsEndpoint
(
recognizer
,
stream
)
!=
0
}
}
...
...
@@ -541,31 +527,39 @@ func sherpaOnnxOfflineRecognizerConfig(
class
SherpaOnnxOfflineRecongitionResult
{
/// A pointer to the underlying counterpart in C
let
result
:
UnsafePointer
<
SherpaOnnxOfflineRecognizerResult
>!
let
result
:
UnsafePointer
<
SherpaOnnxOfflineRecognizerResult
>
private
lazy
var
_text
:
String
=
{
guard
let
cstr
=
result
.
pointee
.
text
else
{
return
""
}
return
String
(
cString
:
cstr
)
}()
private
lazy
var
_timestamps
:
[
Float
]
=
{
guard
let
p
=
result
.
pointee
.
timestamps
else
{
return
[]
}
return
(
0
..<
result
.
pointee
.
count
)
.
map
{
p
[
Int
(
$0
)]
}
}()
private
lazy
var
_lang
:
String
=
{
guard
let
cstr
=
result
.
pointee
.
lang
else
{
return
""
}
return
String
(
cString
:
cstr
)
}()
private
lazy
var
_emotion
:
String
=
{
guard
let
cstr
=
result
.
pointee
.
emotion
else
{
return
""
}
return
String
(
cString
:
cstr
)
}()
private
lazy
var
_event
:
String
=
{
guard
let
cstr
=
result
.
pointee
.
event
else
{
return
""
}
return
String
(
cString
:
cstr
)
}()
/// Return the actual recognition result.
/// For English models, it contains words separated by spaces.
/// For Chinese models, it contains Chinese words.
var
text
:
String
{
return
String
(
cString
:
result
.
pointee
.
text
)
}
var
count
:
Int32
{
return
result
.
pointee
.
count
}
var
timestamps
:
[
Float
]
{
if
let
p
=
result
.
pointee
.
timestamps
{
var
timestamps
:
[
Float
]
=
[]
for
index
in
0
..<
count
{
timestamps
.
append
(
p
[
Int
(
index
)])
}
return
timestamps
}
else
{
let
timestamps
:
[
Float
]
=
[]
return
timestamps
}
}
var
text
:
String
{
_text
}
var
count
:
Int
{
Int
(
result
.
pointee
.
count
)
}
var
timestamps
:
[
Float
]
{
_timestamps
}
// For SenseVoice models, it can be zh, en, ja, yue, ko
// where zh is for Chinese
...
...
@@ -573,45 +567,38 @@ class SherpaOnnxOfflineRecongitionResult {
// ja is for Japanese
// yue is for Cantonese
// ko is for Korean
var
lang
:
String
{
return
String
(
cString
:
result
.
pointee
.
lang
)
}
var
lang
:
String
{
_lang
}
// for SenseVoice models
var
emotion
:
String
{
return
String
(
cString
:
result
.
pointee
.
emotion
)
}
var
emotion
:
String
{
_emotion
}
// for SenseVoice models
var
event
:
String
{
return
String
(
cString
:
result
.
pointee
.
event
)
}
var
event
:
String
{
_event
}
init
(
result
:
UnsafePointer
<
SherpaOnnxOfflineRecognizerResult
>
!
)
{
init
(
result
:
UnsafePointer
<
SherpaOnnxOfflineRecognizerResult
>
)
{
self
.
result
=
result
}
deinit
{
if
let
result
{
SherpaOnnxDestroyOfflineRecognizerResult
(
result
)
}
SherpaOnnxDestroyOfflineRecognizerResult
(
result
)
}
}
class
SherpaOnnxOfflineRecognizer
{
/// A pointer to the underlying counterpart in C
let
recognizer
:
OpaquePointer
!
private
let
recognizer
:
OpaquePointer
init
(
config
:
UnsafePointer
<
SherpaOnnxOfflineRecognizerConfig
>
!
config
:
UnsafePointer
<
SherpaOnnxOfflineRecognizerConfig
>
)
{
recognizer
=
SherpaOnnxCreateOfflineRecognizer
(
config
)
guard
let
ptr
=
SherpaOnnxCreateOfflineRecognizer
(
config
)
else
{
fatalError
(
"Failed to create SherpaOnnxOfflineRecognizer"
)
}
self
.
recognizer
=
ptr
}
deinit
{
if
let
recognizer
{
SherpaOnnxDestroyOfflineRecognizer
(
recognizer
)
}
SherpaOnnxDestroyOfflineRecognizer
(
recognizer
)
}
/// Decode wave samples.
...
...
@@ -620,23 +607,25 @@ class SherpaOnnxOfflineRecognizer {
/// - samples: Audio samples normalized to the range [-1, 1]
/// - sampleRate: Sample rate of the input audio samples. Must match
/// the one expected by the model.
func
decode
(
samples
:
[
Float
],
sampleRate
:
Int
=
16000
)
->
SherpaOnnxOfflineRecongitionResult
{
let
stream
:
OpaquePointer
!
=
SherpaOnnxCreateOfflineStream
(
recognizer
)
func
decode
(
samples
:
[
Float
],
sampleRate
:
Int
=
16_000
)
->
SherpaOnnxOfflineRecongitionResult
{
guard
let
stream
=
SherpaOnnxCreateOfflineStream
(
recognizer
)
else
{
fatalError
(
"Failed to create offline stream"
)
}
defer
{
SherpaOnnxDestroyOfflineStream
(
stream
)
}
SherpaOnnxAcceptWaveformOffline
(
stream
,
Int32
(
sampleRate
),
samples
,
Int32
(
samples
.
count
))
SherpaOnnxDecodeOfflineStream
(
recognizer
,
stream
)
let
result
:
UnsafePointer
<
SherpaOnnxOfflineRecognizerResult
>
?
=
SherpaOnnxGetOfflineStreamResult
(
stream
)
SherpaOnnxDestroyOfflineStream
(
stream
)
guard
let
resultPtr
=
SherpaOnnxGetOfflineStreamResult
(
stream
)
else
{
fatalError
(
"Failed to get offline recognition result"
)
}
return
SherpaOnnxOfflineRecongitionResult
(
result
:
result
)
return
SherpaOnnxOfflineRecongitionResult
(
result
:
result
Ptr
)
}
func
setConfig
(
config
:
UnsafePointer
<
SherpaOnnxOfflineRecognizerConfig
>
!
)
{
func
setConfig
(
config
:
UnsafePointer
<
SherpaOnnxOfflineRecognizerConfig
>
)
{
SherpaOnnxOfflineRecognizerSetConfig
(
recognizer
,
config
)
}
}
...
...
@@ -696,37 +685,38 @@ func sherpaOnnxVadModelConfig(
}
class
SherpaOnnxCircularBufferWrapper
{
let
buffer
:
OpaquePointer
!
private
let
buffer
:
OpaquePointer
init
(
capacity
:
Int
)
{
buffer
=
SherpaOnnxCreateCircularBuffer
(
Int32
(
capacity
))
guard
let
ptr
=
SherpaOnnxCreateCircularBuffer
(
Int32
(
capacity
))
else
{
fatalError
(
"Failed to create SherpaOnnxCircularBuffer"
)
}
self
.
buffer
=
ptr
}
deinit
{
if
let
buffer
{
SherpaOnnxDestroyCircularBuffer
(
buffer
)
}
SherpaOnnxDestroyCircularBuffer
(
buffer
)
}
func
push
(
samples
:
[
Float
])
{
guard
!
samples
.
isEmpty
else
{
return
}
SherpaOnnxCircularBufferPush
(
buffer
,
samples
,
Int32
(
samples
.
count
))
}
func
get
(
startIndex
:
Int
,
n
:
Int
)
->
[
Float
]
{
let
p
:
UnsafePointer
<
Float
>!
=
SherpaOnnxCircularBufferGet
(
buffer
,
Int32
(
startIndex
),
Int32
(
n
))
var
samples
:
[
Float
]
=
[]
guard
startIndex
>=
0
else
{
return
[]
}
guard
n
>
0
else
{
return
[]
}
for
index
in
0
..<
n
{
samples
.
append
(
p
[
Int
(
index
)])
guard
let
ptr
=
SherpaOnnxCircularBufferGet
(
buffer
,
Int32
(
startIndex
),
Int32
(
n
))
else
{
return
[]
}
defer
{
SherpaOnnxCircularBufferFree
(
ptr
)
}
SherpaOnnxCircularBufferFree
(
p
)
return
samples
return
Array
(
UnsafeBufferPointer
(
start
:
ptr
,
count
:
n
))
}
func
pop
(
n
:
Int
)
{
guard
n
>
0
else
{
return
}
SherpaOnnxCircularBufferPop
(
buffer
,
Int32
(
n
))
}
...
...
@@ -740,47 +730,42 @@ class SherpaOnnxCircularBufferWrapper {
}
class
SherpaOnnxSpeechSegmentWrapper
{
let
p
:
UnsafePointer
<
SherpaOnnxSpeechSegment
>!
private
let
p
:
UnsafePointer
<
SherpaOnnxSpeechSegment
>
init
(
p
:
UnsafePointer
<
SherpaOnnxSpeechSegment
>
!
)
{
init
(
p
:
UnsafePointer
<
SherpaOnnxSpeechSegment
>
)
{
self
.
p
=
p
}
deinit
{
if
let
p
{
SherpaOnnxDestroySpeechSegment
(
p
)
}
SherpaOnnxDestroySpeechSegment
(
p
)
}
var
start
:
Int
{
return
Int
(
p
.
pointee
.
start
)
Int
(
p
.
pointee
.
start
)
}
var
n
:
Int
{
return
Int
(
p
.
pointee
.
n
)
Int
(
p
.
pointee
.
n
)
}
var
samples
:
[
Float
]
{
var
samples
:
[
Float
]
=
[]
for
index
in
0
..<
n
{
samples
.
append
(
p
.
pointee
.
samples
[
Int
(
index
)])
}
return
samples
}
lazy
var
samples
:
[
Float
]
=
{
Array
(
UnsafeBufferPointer
(
start
:
p
.
pointee
.
samples
,
count
:
n
))
}()
}
class
SherpaOnnxVoiceActivityDetectorWrapper
{
/// A pointer to the underlying counterpart in C
let
vad
:
OpaquePointer
!
private
let
vad
:
OpaquePointer
init
(
config
:
UnsafePointer
<
SherpaOnnxVadModelConfig
>!
,
buffer_size_in_seconds
:
Float
)
{
vad
=
SherpaOnnxCreateVoiceActivityDetector
(
config
,
buffer_size_in_seconds
)
init
(
config
:
UnsafePointer
<
SherpaOnnxVadModelConfig
>
,
buffer_size_in_seconds
:
Float
)
{
guard
let
vad
=
SherpaOnnxCreateVoiceActivityDetector
(
config
,
buffer_size_in_seconds
)
else
{
fatalError
(
"SherpaOnnxCreateVoiceActivityDetector returned nil"
)
}
self
.
vad
=
vad
}
deinit
{
if
let
vad
{
SherpaOnnxDestroyVoiceActivityDetector
(
vad
)
}
SherpaOnnxDestroyVoiceActivityDetector
(
vad
)
}
func
acceptWaveform
(
samples
:
[
Float
])
{
...
...
@@ -804,7 +789,9 @@ class SherpaOnnxVoiceActivityDetectorWrapper {
}
func
front
()
->
SherpaOnnxSpeechSegmentWrapper
{
let
p
:
UnsafePointer
<
SherpaOnnxSpeechSegment
>
?
=
SherpaOnnxVoiceActivityDetectorFront
(
vad
)
guard
let
p
=
SherpaOnnxVoiceActivityDetectorFront
(
vad
)
else
{
fatalError
(
"SherpaOnnxVoiceActivityDetectorFront returned nil"
)
}
return
SherpaOnnxSpeechSegmentWrapper
(
p
:
p
)
}
...
...
swift-api-examples/generate-subtitles.swift
查看文件 @
ea31dbf
...
...
@@ -94,9 +94,9 @@ class SpeechSegment: CustomStringConvertible {
func
run
()
{
var
recognizer
:
SherpaOnnxOfflineRecognizer
var
modelConfig
:
SherpaOnnxOfflineModelConfig
var
modelType
=
"whisper"
let
modelType
=
"whisper"
// modelType = "paraformer"
var
filePath
=
"/Users/fangjun/Desktop/Obama.wav"
// English
let
filePath
=
"/Users/fangjun/Desktop/Obama.wav"
// English
// filePath = "/Users/fangjun/Desktop/lei-jun.wav" // Chinese
// please go to https://huggingface.co/csukuangfj/vad
// to download the above two files
...
...
@@ -192,7 +192,7 @@ func run() {
let
audioFileBuffer
=
AVAudioPCMBuffer
(
pcmFormat
:
audioFormat
,
frameCapacity
:
audioFrameCount
)
try!
audioFile
.
read
(
into
:
audioFileBuffer
!
)
var
array
:
[
Float
]
!
=
audioFileBuffer
?
.
array
()
let
array
:
[
Float
]
!
=
audioFileBuffer
?
.
array
()
var
segments
:
[
SpeechSegment
]
=
[]
...
...
请
注册
或
登录
后发表评论