正在显示
10 个修改的文件
包含
324 行增加
和
4 行删除
.github/scripts/test-swift.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +echo "pwd: $PWD" | ||
| 6 | + | ||
| 7 | +cd swift-api-examples | ||
| 8 | +ls -lh | ||
| 9 | + | ||
| 10 | +mkdir -p /Users/fangjun/Desktop | ||
| 11 | +pushd /Users/fangjun/Desktop | ||
| 12 | +wget -q https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav | ||
| 13 | +ls -lh | ||
| 14 | +popd | ||
| 15 | + | ||
| 16 | +./run-generate-subtitles.sh | ||
| 17 | + | ||
| 18 | +ls -lh /Users/fangjun/Desktop | ||
| 19 | +cat /Users/fangjun/Desktop/Obama.srt | ||
| 20 | + | ||
| 21 | +./run-tts.sh | ||
| 22 | +ls -lh | ||
| 23 | + | ||
| 24 | +./run-decode-file.sh | ||
| 25 | + | ||
| 26 | +./run-decode-file-non-streaming.sh | ||
| 27 | + | ||
| 28 | +ls -lh |
.github/workflows/swift.yaml
0 → 100644
| 1 | +name: swift | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - master | ||
| 7 | + | ||
| 8 | + pull_request: | ||
| 9 | + branches: | ||
| 10 | + - master | ||
| 11 | + | ||
| 12 | + workflow_dispatch: | ||
| 13 | + | ||
| 14 | +concurrency: | ||
| 15 | + group: swift-${{ github.ref }} | ||
| 16 | + cancel-in-progress: true | ||
| 17 | + | ||
| 18 | +jobs: | ||
| 19 | + swift: | ||
| 20 | + runs-on: ${{ matrix.os }} | ||
| 21 | + strategy: | ||
| 22 | + fail-fast: false | ||
| 23 | + matrix: | ||
| 24 | + os: [macos-13] | ||
| 25 | + | ||
| 26 | + steps: | ||
| 27 | + - uses: actions/checkout@v4 | ||
| 28 | + with: | ||
| 29 | + fetch-depth: 0 | ||
| 30 | + | ||
| 31 | + - name: ccache | ||
| 32 | + uses: hendrikmuhs/ccache-action@v1.2 | ||
| 33 | + with: | ||
| 34 | + key: ${{ matrix.os }}-swift | ||
| 35 | + | ||
| 36 | + - name: Build | ||
| 37 | + shell: bash | ||
| 38 | + run: | | ||
| 39 | + sudo mkdir -p /Users/fangjun/Desktop | ||
| 40 | + sudo chmod a=rwx /Users/fangjun/Desktop | ||
| 41 | + ls -lhd /Users/fangjun/Desktop | ||
| 42 | + ls -lh /Users/fangjun/Desktop | ||
| 43 | + | ||
| 44 | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache | ||
| 45 | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" | ||
| 46 | + cmake --version | ||
| 47 | + | ||
| 48 | + ./build-swift-macos.sh | ||
| 49 | + | ||
| 50 | + - name: test | ||
| 51 | + shell: bash | ||
| 52 | + run: | | ||
| 53 | + .github/scripts/test-swift.sh |
| @@ -572,3 +572,110 @@ class SherpaOnnxVoiceActivityDetectorWrapper { | @@ -572,3 +572,110 @@ class SherpaOnnxVoiceActivityDetectorWrapper { | ||
| 572 | SherpaOnnxVoiceActivityDetectorReset(vad) | 572 | SherpaOnnxVoiceActivityDetectorReset(vad) |
| 573 | } | 573 | } |
| 574 | } | 574 | } |
| 575 | + | ||
| 576 | +// offline tts | ||
| 577 | +func sherpaOnnxOfflineTtsVitsModelConfig( | ||
| 578 | + model: String, | ||
| 579 | + lexicon: String, | ||
| 580 | + tokens: String, | ||
| 581 | + noiseScale: Float = 0.667, | ||
| 582 | + noiseScaleW: Float = 0.8, | ||
| 583 | + lengthScale: Float = 1.0 | ||
| 584 | +) -> SherpaOnnxOfflineTtsVitsModelConfig { | ||
| 585 | + return SherpaOnnxOfflineTtsVitsModelConfig( | ||
| 586 | + model: toCPointer(model), | ||
| 587 | + lexicon: toCPointer(lexicon), | ||
| 588 | + tokens: toCPointer(tokens), | ||
| 589 | + noise_scale: noiseScale, | ||
| 590 | + noise_scale_w: noiseScaleW, | ||
| 591 | + length_scale: lengthScale) | ||
| 592 | +} | ||
| 593 | + | ||
| 594 | +func sherpaOnnxOfflineTtsModelConfig( | ||
| 595 | + vits: SherpaOnnxOfflineTtsVitsModelConfig, | ||
| 596 | + numThreads: Int = 1, | ||
| 597 | + debug: Int = 0, | ||
| 598 | + provider: String = "cpu" | ||
| 599 | +) -> SherpaOnnxOfflineTtsModelConfig { | ||
| 600 | + return SherpaOnnxOfflineTtsModelConfig( | ||
| 601 | + vits: vits, | ||
| 602 | + num_threads: Int32(numThreads), | ||
| 603 | + debug: Int32(debug), | ||
| 604 | + provider: toCPointer(provider) | ||
| 605 | + ) | ||
| 606 | +} | ||
| 607 | + | ||
| 608 | +func sherpaOnnxOfflineTtsConfig( | ||
| 609 | + model: SherpaOnnxOfflineTtsModelConfig, | ||
| 610 | + ruleFsts: String = "" | ||
| 611 | +) -> SherpaOnnxOfflineTtsConfig { | ||
| 612 | + return SherpaOnnxOfflineTtsConfig( | ||
| 613 | + model: model, | ||
| 614 | + rule_fsts: toCPointer(ruleFsts) | ||
| 615 | + ) | ||
| 616 | +} | ||
| 617 | + | ||
| 618 | +class SherpaOnnxGeneratedAudioWrapper { | ||
| 619 | + /// A pointer to the underlying counterpart in C | ||
| 620 | + let audio: UnsafePointer<SherpaOnnxGeneratedAudio>! | ||
| 621 | + | ||
| 622 | + init(audio: UnsafePointer<SherpaOnnxGeneratedAudio>!) { | ||
| 623 | + self.audio = audio | ||
| 624 | + } | ||
| 625 | + | ||
| 626 | + deinit { | ||
| 627 | + if let audio { | ||
| 628 | + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio) | ||
| 629 | + } | ||
| 630 | + } | ||
| 631 | + | ||
| 632 | + var n: Int32 { | ||
| 633 | + return audio.pointee.n | ||
| 634 | + } | ||
| 635 | + | ||
| 636 | + var sampleRate: Int32 { | ||
| 637 | + return audio.pointee.sample_rate | ||
| 638 | + } | ||
| 639 | + | ||
| 640 | + var samples: [Float] { | ||
| 641 | + if let p = audio.pointee.samples { | ||
| 642 | + var samples: [Float] = [] | ||
| 643 | + for index in 0..<n { | ||
| 644 | + samples.append(p[Int(index)]) | ||
| 645 | + } | ||
| 646 | + return samples | ||
| 647 | + } else { | ||
| 648 | + let samples: [Float] = [] | ||
| 649 | + return samples | ||
| 650 | + } | ||
| 651 | + } | ||
| 652 | + | ||
| 653 | + func save(filename: String) { | ||
| 654 | + SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename)) | ||
| 655 | + } | ||
| 656 | +} | ||
| 657 | + | ||
| 658 | +class SherpaOnnxOfflineTtsWrapper { | ||
| 659 | + /// A pointer to the underlying counterpart in C | ||
| 660 | + let tts: OpaquePointer! | ||
| 661 | + | ||
| 662 | + /// Constructor taking a model config | ||
| 663 | + init( | ||
| 664 | + config: UnsafePointer<SherpaOnnxOfflineTtsConfig>! | ||
| 665 | + ) { | ||
| 666 | + tts = SherpaOnnxCreateOfflineTts(config) | ||
| 667 | + } | ||
| 668 | + | ||
| 669 | + deinit { | ||
| 670 | + if let tts { | ||
| 671 | + SherpaOnnxDestroyOfflineTts(tts) | ||
| 672 | + } | ||
| 673 | + } | ||
| 674 | + | ||
| 675 | + func generate(text: String, sid: Int = 0, speed: Float = 1.0) -> SherpaOnnxGeneratedAudioWrapper { | ||
| 676 | + let audio: UnsafePointer<SherpaOnnxGeneratedAudio>? = SherpaOnnxOfflineTtsGenerate( | ||
| 677 | + tts, toCPointer(text), Int32(sid), speed) | ||
| 678 | + | ||
| 679 | + return SherpaOnnxGeneratedAudioWrapper(audio: audio) | ||
| 680 | + } | ||
| 681 | +} |
| @@ -175,8 +175,8 @@ func run() { | @@ -175,8 +175,8 @@ func run() { | ||
| 175 | var segments: [SpeechSegment] = [] | 175 | var segments: [SpeechSegment] = [] |
| 176 | 176 | ||
| 177 | for offset in stride(from: 0, to: array.count, by: windowSize) { | 177 | for offset in stride(from: 0, to: array.count, by: windowSize) { |
| 178 | - let end = min(offset + windowSize, array.count) | ||
| 179 | - vad.acceptWaveform(samples: [Float](array[offset ..< end])) | 178 | + let end = min(offset + windowSize, array.count) |
| 179 | + vad.acceptWaveform(samples: [Float](array[offset..<end])) | ||
| 180 | } | 180 | } |
| 181 | 181 | ||
| 182 | var index: Int = 0 | 182 | var index: Int = 0 |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then | ||
| 11 | + echo "Please download the pre-trained model for testing." | ||
| 12 | + echo "You can refer to" | ||
| 13 | + echo "" | ||
| 14 | + echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html" | ||
| 15 | + echo "" | ||
| 16 | + echo "for help" | ||
| 17 | + | ||
| 18 | + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 19 | + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 20 | + rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 21 | +fi | ||
| 22 | + | ||
| 23 | +if [ ! -e ./decode-file-non-streaming ]; then | ||
| 24 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 25 | + swiftc \ | ||
| 26 | + -lc++ \ | ||
| 27 | + -I ../build-swift-macos/install/include \ | ||
| 28 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 29 | + ./decode-file-non-streaming.swift ./SherpaOnnx.swift \ | ||
| 30 | + -L ../build-swift-macos/install/lib/ \ | ||
| 31 | + -l sherpa-onnx \ | ||
| 32 | + -l onnxruntime \ | ||
| 33 | + -o decode-file-non-streaming | ||
| 34 | + | ||
| 35 | + strip decode-file-non-streaming | ||
| 36 | +else | ||
| 37 | + echo "./decode-file-non-streaming exists - skip building" | ||
| 38 | +fi | ||
| 39 | + | ||
| 40 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | +./decode-file-non-streaming |
| @@ -14,7 +14,10 @@ if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then | @@ -14,7 +14,10 @@ if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then | ||
| 14 | echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english" | 14 | echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english" |
| 15 | echo "" | 15 | echo "" |
| 16 | echo "for help" | 16 | echo "for help" |
| 17 | - exit 1 | 17 | + |
| 18 | + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 19 | + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 20 | + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 18 | fi | 21 | fi |
| 19 | 22 | ||
| 20 | if [ ! -e ./decode-file ]; then | 23 | if [ ! -e ./decode-file ]; then |
| @@ -28,6 +31,8 @@ if [ ! -e ./decode-file ]; then | @@ -28,6 +31,8 @@ if [ ! -e ./decode-file ]; then | ||
| 28 | -l sherpa-onnx \ | 31 | -l sherpa-onnx \ |
| 29 | -l onnxruntime \ | 32 | -l onnxruntime \ |
| 30 | -o decode-file | 33 | -o decode-file |
| 34 | + | ||
| 35 | + strip decode-file | ||
| 31 | else | 36 | else |
| 32 | echo "./decode-file exists - skip building" | 37 | echo "./decode-file exists - skip building" |
| 33 | fi | 38 | fi |
| @@ -14,7 +14,15 @@ if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then | @@ -14,7 +14,15 @@ if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then | ||
| 14 | echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html" | 14 | echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html" |
| 15 | echo "" | 15 | echo "" |
| 16 | echo "for help" | 16 | echo "for help" |
| 17 | - exit 1 | 17 | + |
| 18 | + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 19 | + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 20 | + rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 21 | + ls -lh sherpa-onnx-whisper-tiny.en | ||
| 22 | +fi | ||
| 23 | +if [ ! -f ./silero_vad.onnx ]; then | ||
| 24 | + echo "downloading silero_vad" | ||
| 25 | + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 18 | fi | 26 | fi |
| 19 | 27 | ||
| 20 | if [ ! -e ./generate-subtitles ]; then | 28 | if [ ! -e ./generate-subtitles ]; then |
| @@ -28,6 +36,8 @@ if [ ! -e ./generate-subtitles ]; then | @@ -28,6 +36,8 @@ if [ ! -e ./generate-subtitles ]; then | ||
| 28 | -l sherpa-onnx \ | 36 | -l sherpa-onnx \ |
| 29 | -l onnxruntime \ | 37 | -l onnxruntime \ |
| 30 | -o generate-subtitles | 38 | -o generate-subtitles |
| 39 | + | ||
| 40 | + strip generate-subtitles | ||
| 31 | else | 41 | else |
| 32 | echo "./generate-subtitles exists - skip building" | 42 | echo "./generate-subtitles exists - skip building" |
| 33 | fi | 43 | fi |
swift-api-examples/run-tts.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -d ./vits-vctk ]; then | ||
| 11 | + echo "Please download the pre-trained model for testing." | ||
| 12 | + echo "You can refer to" | ||
| 13 | + echo "" | ||
| 14 | + echo "https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers" | ||
| 15 | + echo "" | ||
| 16 | + echo "for help" | ||
| 17 | + | ||
| 18 | + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2 | ||
| 19 | + tar xvf vits-vctk.tar.bz2 | ||
| 20 | + rm vits-vctk.tar.bz2 | ||
| 21 | +fi | ||
| 22 | + | ||
| 23 | +if [ ! -e ./tts ]; then | ||
| 24 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 25 | + swiftc \ | ||
| 26 | + -lc++ \ | ||
| 27 | + -I ../build-swift-macos/install/include \ | ||
| 28 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 29 | + ./tts.swift ./SherpaOnnx.swift \ | ||
| 30 | + -L ../build-swift-macos/install/lib/ \ | ||
| 31 | + -l sherpa-onnx \ | ||
| 32 | + -l onnxruntime \ | ||
| 33 | + -o tts | ||
| 34 | + | ||
| 35 | + strip tts | ||
| 36 | +else | ||
| 37 | + echo "./tts exists - skip building" | ||
| 38 | +fi | ||
| 39 | + | ||
| 40 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | +./tts |
swift-api-examples/tts.swift
0 → 100644
| 1 | +func run() { | ||
| 2 | + let model = "./vits-vctk/vits-vctk.onnx" | ||
| 3 | + let lexicon = "./vits-vctk/lexicon.txt" | ||
| 4 | + let tokens = "./vits-vctk/tokens.txt" | ||
| 5 | + let vits = sherpaOnnxOfflineTtsVitsModelConfig( | ||
| 6 | + model: model, | ||
| 7 | + lexicon: lexicon, | ||
| 8 | + tokens: tokens | ||
| 9 | + ) | ||
| 10 | + let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits) | ||
| 11 | + var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig) | ||
| 12 | + | ||
| 13 | + let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig) | ||
| 14 | + | ||
| 15 | + let text = "How are you doing? Fantastic!" | ||
| 16 | + let sid = 99 | ||
| 17 | + let speed: Float = 1.0 | ||
| 18 | + | ||
| 19 | + let audio = tts.generate(text: text, sid: sid, speed: speed) | ||
| 20 | + let filename = "test.wav" | ||
| 21 | + audio.save(filename: filename) | ||
| 22 | + | ||
| 23 | + print("\nSaved to:\n\(filename)") | ||
| 24 | +} | ||
| 25 | + | ||
| 26 | +@main | ||
| 27 | +struct App { | ||
| 28 | + static func main() { | ||
| 29 | + run() | ||
| 30 | + } | ||
| 31 | +} |
-
请 注册 或 登录 后发表评论