正在显示
11 个修改的文件
包含
180 行增加
和
2 行删除
| @@ -26,6 +26,8 @@ jobs: | @@ -26,6 +26,8 @@ jobs: | ||
| 26 | include: | 26 | include: |
| 27 | - os: ubuntu-latest | 27 | - os: ubuntu-latest |
| 28 | arch: amd64 | 28 | arch: amd64 |
| 29 | + - os: ubuntu-22.04-arm | ||
| 30 | + arch: arm64 | ||
| 29 | - os: macos-13 | 31 | - os: macos-13 |
| 30 | arch: amd64 | 32 | arch: amd64 |
| 31 | - os: macos-14 | 33 | - os: macos-14 |
| @@ -460,6 +462,19 @@ jobs: | @@ -460,6 +462,19 @@ jobs: | ||
| 460 | ./run-tdnn-yesno.sh | 462 | ./run-tdnn-yesno.sh |
| 461 | rm -rf sherpa-onnx-tdnn-yesno | 463 | rm -rf sherpa-onnx-tdnn-yesno |
| 462 | 464 | ||
| 465 | + - name: Test audio tagging (Linux/macOS) | ||
| 466 | + if: matrix.os != 'windows-latest' | ||
| 467 | + shell: bash | ||
| 468 | + run: | | ||
| 469 | + cd go-api-examples/audio-tagging | ||
| 470 | + ls -lh | ||
| 471 | + go mod tidy | ||
| 472 | + cat go.mod | ||
| 473 | + go build | ||
| 474 | + ls -lh | ||
| 475 | + | ||
| 476 | + ./run.sh | ||
| 477 | + | ||
| 463 | - name: Test streaming decoding files (Linux/macOS) | 478 | - name: Test streaming decoding files (Linux/macOS) |
| 464 | if: matrix.os != 'windows-latest' | 479 | if: matrix.os != 'windows-latest' |
| 465 | shell: bash | 480 | shell: bash |
| @@ -33,7 +33,7 @@ jobs: | @@ -33,7 +33,7 @@ jobs: | ||
| 33 | strategy: | 33 | strategy: |
| 34 | fail-fast: false | 34 | fail-fast: false |
| 35 | matrix: | 35 | matrix: |
| 36 | - os: [macos-latest, macos-13, ubuntu-latest, windows-latest] | 36 | + os: [macos-latest, macos-13, ubuntu-latest, windows-latest, ubuntu-22.04-arm] |
| 37 | 37 | ||
| 38 | steps: | 38 | steps: |
| 39 | - uses: actions/checkout@v4 | 39 | - uses: actions/checkout@v4 |
| @@ -87,7 +87,7 @@ jobs: | @@ -87,7 +87,7 @@ jobs: | ||
| 87 | make -j2 install | 87 | make -j2 install |
| 88 | fi | 88 | fi |
| 89 | 89 | ||
| 90 | - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then | 90 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then |
| 91 | cp -v ./lib/*.so $upload_dir | 91 | cp -v ./lib/*.so $upload_dir |
| 92 | cp -v _deps/onnxruntime-src/lib/libonnxruntime*so* $upload_dir | 92 | cp -v _deps/onnxruntime-src/lib/libonnxruntime*so* $upload_dir |
| 93 | 93 | ||
| @@ -132,6 +132,15 @@ jobs: | @@ -132,6 +132,15 @@ jobs: | ||
| 132 | name: ${{ matrix.os }}-libs | 132 | name: ${{ matrix.os }}-libs |
| 133 | path: to-upload/ | 133 | path: to-upload/ |
| 134 | 134 | ||
| 135 | + - name: Test audio tagging | ||
| 136 | + shell: bash | ||
| 137 | + run: | | ||
| 138 | + cd scripts/go/_internal/audio-tagging/ | ||
| 139 | + | ||
| 140 | + ./run.sh | ||
| 141 | + | ||
| 142 | + ls -lh | ||
| 143 | + | ||
| 135 | - name: Test Keyword spotting | 144 | - name: Test Keyword spotting |
| 136 | shell: bash | 145 | shell: bash |
| 137 | run: | | 146 | run: | |
go-api-examples/audio-tagging/go.mod
0 → 100644
go-api-examples/audio-tagging/main.go
0 → 100644
| 1 | +package main | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + "fmt" | ||
| 5 | + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
| 6 | + "log" | ||
| 7 | +) | ||
| 8 | + | ||
| 9 | +func main() { | ||
| 10 | + config := sherpa.AudioTaggingConfig{} | ||
| 11 | + config.Model.Zipformer.Model = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx" | ||
| 12 | + config.Model.NumThreads = 1 | ||
| 13 | + config.Model.Debug = 1 | ||
| 14 | + config.Model.Provider = "cpu" | ||
| 15 | + config.Labels = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv" | ||
| 16 | + config.TopK = 5 | ||
| 17 | + | ||
| 18 | + tagging := sherpa.NewAudioTagging(&config) | ||
| 19 | + defer sherpa.DeleteAudioTagging(tagging) | ||
| 20 | + | ||
| 21 | + wave_filename := "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/3.wav" | ||
| 22 | + | ||
| 23 | + wave := sherpa.ReadWave(wave_filename) | ||
| 24 | + if wave == nil { | ||
| 25 | + log.Printf("Failed to read %v\n", wave_filename) | ||
| 26 | + return | ||
| 27 | + } | ||
| 28 | + | ||
| 29 | + stream := sherpa.NewAudioTaggingStream(tagging) | ||
| 30 | + defer sherpa.DeleteOfflineStream(stream) | ||
| 31 | + | ||
| 32 | + stream.AcceptWaveform(wave.SampleRate, wave.Samples) | ||
| 33 | + | ||
| 34 | + result := tagging.Compute(stream, 10) | ||
| 35 | + fmt.Printf("the tagging result: %v\n", result) | ||
| 36 | +} |
go-api-examples/audio-tagging/run.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +if [ ! -f ./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx ]; then | ||
| 4 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 | ||
| 5 | + | ||
| 6 | + tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 | ||
| 7 | + rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +go mod tidy | ||
| 11 | +go build | ||
| 12 | + | ||
| 13 | +./audio-tagging |
| 1 | +audio-tagging |
scripts/go/_internal/audio-tagging/go.mod
0 → 100644
scripts/go/_internal/audio-tagging/main.go
0 → 120000
| 1 | +../../../../go-api-examples/audio-tagging/main.go |
scripts/go/_internal/audio-tagging/run.sh
0 → 120000
| 1 | +../../../../go-api-examples/audio-tagging/run.sh |
| 1 | +../../../../build/lib |
| @@ -1607,3 +1607,95 @@ func (spotter *KeywordSpotter) GetResult(s *OnlineStream) *KeywordSpotterResult | @@ -1607,3 +1607,95 @@ func (spotter *KeywordSpotter) GetResult(s *OnlineStream) *KeywordSpotterResult | ||
| 1607 | result.Keyword = C.GoString(p.keyword) | 1607 | result.Keyword = C.GoString(p.keyword) |
| 1608 | return result | 1608 | return result |
| 1609 | } | 1609 | } |
| 1610 | + | ||
| 1611 | +// Configuration for the audio tagging. | ||
| 1612 | +type OfflineZipformerAudioTaggingModelConfig struct { | ||
| 1613 | + Model string | ||
| 1614 | +} | ||
| 1615 | + | ||
| 1616 | +type AudioTaggingModelConfig struct { | ||
| 1617 | + Zipformer OfflineZipformerAudioTaggingModelConfig | ||
| 1618 | + Ced string | ||
| 1619 | + NumThreads int32 | ||
| 1620 | + Debug int32 | ||
| 1621 | + Provider string | ||
| 1622 | +} | ||
| 1623 | + | ||
| 1624 | +type AudioTaggingConfig struct { | ||
| 1625 | + Model AudioTaggingModelConfig | ||
| 1626 | + Labels string | ||
| 1627 | + TopK int32 | ||
| 1628 | +} | ||
| 1629 | + | ||
| 1630 | +type AudioTagging struct { | ||
| 1631 | + impl *C.struct_SherpaOnnxAudioTagging | ||
| 1632 | +} | ||
| 1633 | + | ||
| 1634 | +type AudioEvent struct { | ||
| 1635 | + Name string | ||
| 1636 | + Index int | ||
| 1637 | + Prob float32 | ||
| 1638 | +} | ||
| 1639 | + | ||
| 1640 | +func DeleteAudioTagging(tagging *AudioTagging) { | ||
| 1641 | + C.SherpaOnnxDestroyAudioTagging(tagging.impl) | ||
| 1642 | + tagging.impl = nil | ||
| 1643 | +} | ||
| 1644 | + | ||
| 1645 | +// The user is responsible to invoke [DeleteAudioTagging]() to free | ||
| 1646 | +// the returned tagger to avoid memory leak | ||
| 1647 | +func NewAudioTagging(config *AudioTaggingConfig) *AudioTagging { | ||
| 1648 | + c := C.struct_SherpaOnnxAudioTaggingConfig{} | ||
| 1649 | + | ||
| 1650 | + c.model.zipformer.model = C.CString(config.Model.Zipformer.Model) | ||
| 1651 | + defer C.free(unsafe.Pointer(c.model.zipformer.model)) | ||
| 1652 | + | ||
| 1653 | + c.model.ced = C.CString(config.Model.Ced) | ||
| 1654 | + defer C.free(unsafe.Pointer(c.model.ced)) | ||
| 1655 | + | ||
| 1656 | + c.model.num_threads = C.int(config.Model.NumThreads) | ||
| 1657 | + | ||
| 1658 | + c.model.provider = C.CString(config.Model.Provider) | ||
| 1659 | + defer C.free(unsafe.Pointer(c.model.provider)) | ||
| 1660 | + | ||
| 1661 | + c.model.debug = C.int(config.Model.Debug) | ||
| 1662 | + | ||
| 1663 | + c.labels = C.CString(config.Labels) | ||
| 1664 | + defer C.free(unsafe.Pointer(c.labels)) | ||
| 1665 | + | ||
| 1666 | + c.top_k = C.int(config.TopK) | ||
| 1667 | + | ||
| 1668 | + tagging := &AudioTagging{} | ||
| 1669 | + tagging.impl = C.SherpaOnnxCreateAudioTagging(&c) | ||
| 1670 | + | ||
| 1671 | + return tagging | ||
| 1672 | +} | ||
| 1673 | + | ||
| 1674 | +// The user is responsible to invoke [DeleteOfflineStream]() to free | ||
| 1675 | +// the returned stream to avoid memory leak | ||
| 1676 | +func NewAudioTaggingStream(tagging *AudioTagging) *OfflineStream { | ||
| 1677 | + stream := &OfflineStream{} | ||
| 1678 | + stream.impl = C.SherpaOnnxAudioTaggingCreateOfflineStream(tagging.impl) | ||
| 1679 | + return stream | ||
| 1680 | +} | ||
| 1681 | + | ||
| 1682 | +func (tagging *AudioTagging) Compute(s *OfflineStream, topK int32) []AudioEvent { | ||
| 1683 | + r := C.SherpaOnnxAudioTaggingCompute(tagging.impl, s.impl, C.int(topK)) | ||
| 1684 | + defer C.SherpaOnnxAudioTaggingFreeResults(r) | ||
| 1685 | + result := make([]AudioEvent, 0) | ||
| 1686 | + | ||
| 1687 | + p := (*[1 << 28]*C.struct_SherpaOnnxAudioEvent)(unsafe.Pointer(r)) | ||
| 1688 | + i := 0 | ||
| 1689 | + for { | ||
| 1690 | + if p[i] == nil { | ||
| 1691 | + break | ||
| 1692 | + } | ||
| 1693 | + result = append(result, AudioEvent{ | ||
| 1694 | + Name: C.GoString(p[i].name), | ||
| 1695 | + Index: int(p[i].index), | ||
| 1696 | + Prob: float32(p[i].prob), | ||
| 1697 | + }) | ||
| 1698 | + i += 1 | ||
| 1699 | + } | ||
| 1700 | + return result | ||
| 1701 | +} |
-
请 注册 或 登录 后发表评论