Fangjun Kuang
Committed by GitHub

Add Go API for audio tagging (#1840)

@@ -26,6 +26,8 @@ jobs: @@ -26,6 +26,8 @@ jobs:
26 include: 26 include:
27 - os: ubuntu-latest 27 - os: ubuntu-latest
28 arch: amd64 28 arch: amd64
  29 + - os: ubuntu-22.04-arm
  30 + arch: arm64
29 - os: macos-13 31 - os: macos-13
30 arch: amd64 32 arch: amd64
31 - os: macos-14 33 - os: macos-14
@@ -460,6 +462,19 @@ jobs: @@ -460,6 +462,19 @@ jobs:
460 ./run-tdnn-yesno.sh 462 ./run-tdnn-yesno.sh
461 rm -rf sherpa-onnx-tdnn-yesno 463 rm -rf sherpa-onnx-tdnn-yesno
462 464
  465 + - name: Test audio tagging (Linux/macOS)
  466 + if: matrix.os != 'windows-latest'
  467 + shell: bash
  468 + run: |
  469 + cd go-api-examples/audio-tagging
  470 + ls -lh
  471 + go mod tidy
  472 + cat go.mod
  473 + go build
  474 + ls -lh
  475 +
  476 + ./run.sh
  477 +
463 - name: Test streaming decoding files (Linux/macOS) 478 - name: Test streaming decoding files (Linux/macOS)
464 if: matrix.os != 'windows-latest' 479 if: matrix.os != 'windows-latest'
465 shell: bash 480 shell: bash
@@ -33,7 +33,7 @@ jobs: @@ -33,7 +33,7 @@ jobs:
33 strategy: 33 strategy:
34 fail-fast: false 34 fail-fast: false
35 matrix: 35 matrix:
36 - os: [macos-latest, macos-13, ubuntu-latest, windows-latest] 36 + os: [macos-latest, macos-13, ubuntu-latest, windows-latest, ubuntu-22.04-arm]
37 37
38 steps: 38 steps:
39 - uses: actions/checkout@v4 39 - uses: actions/checkout@v4
@@ -87,7 +87,7 @@ jobs: @@ -87,7 +87,7 @@ jobs:
87 make -j2 install 87 make -j2 install
88 fi 88 fi
89 89
90 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 90 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
91 cp -v ./lib/*.so $upload_dir 91 cp -v ./lib/*.so $upload_dir
92 cp -v _deps/onnxruntime-src/lib/libonnxruntime*so* $upload_dir 92 cp -v _deps/onnxruntime-src/lib/libonnxruntime*so* $upload_dir
93 93
@@ -132,6 +132,15 @@ jobs: @@ -132,6 +132,15 @@ jobs:
132 name: ${{ matrix.os }}-libs 132 name: ${{ matrix.os }}-libs
133 path: to-upload/ 133 path: to-upload/
134 134
  135 + - name: Test audio tagging
  136 + shell: bash
  137 + run: |
  138 + cd scripts/go/_internal/audio-tagging/
  139 +
  140 + ./run.sh
  141 +
  142 + ls -lh
  143 +
135 - name: Test Keyword spotting 144 - name: Test Keyword spotting
136 shell: bash 145 shell: bash
137 run: | 146 run: |
  1 +module audio-tagging
  2 +
  3 +go 1.12
  4 +
  1 +package main
  2 +
  3 +import (
  4 + "fmt"
  5 + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
  6 + "log"
  7 +)
  8 +
  9 +func main() {
  10 + config := sherpa.AudioTaggingConfig{}
  11 + config.Model.Zipformer.Model = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx"
  12 + config.Model.NumThreads = 1
  13 + config.Model.Debug = 1
  14 + config.Model.Provider = "cpu"
  15 + config.Labels = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv"
  16 + config.TopK = 5
  17 +
  18 + tagging := sherpa.NewAudioTagging(&config)
  19 + defer sherpa.DeleteAudioTagging(tagging)
  20 +
  21 + wave_filename := "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/3.wav"
  22 +
  23 + wave := sherpa.ReadWave(wave_filename)
  24 + if wave == nil {
  25 + log.Printf("Failed to read %v\n", wave_filename)
  26 + return
  27 + }
  28 +
  29 + stream := sherpa.NewAudioTaggingStream(tagging)
  30 + defer sherpa.DeleteOfflineStream(stream)
  31 +
  32 + stream.AcceptWaveform(wave.SampleRate, wave.Samples)
  33 +
  34 + result := tagging.Compute(stream, 10)
  35 + fmt.Printf("the tagging result: %v\n", result)
  36 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +if [ ! -f ./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx ]; then
  4 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
  5 +
  6 + tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
  7 + rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
  8 +fi
  9 +
  10 +go mod tidy
  11 +go build
  12 +
  13 +./audio-tagging
  1 +module audio-tagging
  2 +
  3 +go 1.12
  4 +
  5 +replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
  1 +../../../../go-api-examples/audio-tagging/main.go
  1 +../../../../go-api-examples/audio-tagging/run.sh
@@ -1607,3 +1607,95 @@ func (spotter *KeywordSpotter) GetResult(s *OnlineStream) *KeywordSpotterResult @@ -1607,3 +1607,95 @@ func (spotter *KeywordSpotter) GetResult(s *OnlineStream) *KeywordSpotterResult
1607 result.Keyword = C.GoString(p.keyword) 1607 result.Keyword = C.GoString(p.keyword)
1608 return result 1608 return result
1609 } 1609 }
  1610 +
  1611 +// Configuration for the audio tagging.
  1612 +type OfflineZipformerAudioTaggingModelConfig struct {
  1613 + Model string
  1614 +}
  1615 +
  1616 +type AudioTaggingModelConfig struct {
  1617 + Zipformer OfflineZipformerAudioTaggingModelConfig
  1618 + Ced string
  1619 + NumThreads int32
  1620 + Debug int32
  1621 + Provider string
  1622 +}
  1623 +
  1624 +type AudioTaggingConfig struct {
  1625 + Model AudioTaggingModelConfig
  1626 + Labels string
  1627 + TopK int32
  1628 +}
  1629 +
  1630 +type AudioTagging struct {
  1631 + impl *C.struct_SherpaOnnxAudioTagging
  1632 +}
  1633 +
  1634 +type AudioEvent struct {
  1635 + Name string
  1636 + Index int
  1637 + Prob float32
  1638 +}
  1639 +
  1640 +func DeleteAudioTagging(tagging *AudioTagging) {
  1641 + C.SherpaOnnxDestroyAudioTagging(tagging.impl)
  1642 + tagging.impl = nil
  1643 +}
  1644 +
  1645 +// The user is responsible to invoke [DeleteAudioTagging]() to free
  1646 +// the returned tagger to avoid memory leak
  1647 +func NewAudioTagging(config *AudioTaggingConfig) *AudioTagging {
  1648 + c := C.struct_SherpaOnnxAudioTaggingConfig{}
  1649 +
  1650 + c.model.zipformer.model = C.CString(config.Model.Zipformer.Model)
  1651 + defer C.free(unsafe.Pointer(c.model.zipformer.model))
  1652 +
  1653 + c.model.ced = C.CString(config.Model.Ced)
  1654 + defer C.free(unsafe.Pointer(c.model.ced))
  1655 +
  1656 + c.model.num_threads = C.int(config.Model.NumThreads)
  1657 +
  1658 + c.model.provider = C.CString(config.Model.Provider)
  1659 + defer C.free(unsafe.Pointer(c.model.provider))
  1660 +
  1661 + c.model.debug = C.int(config.Model.Debug)
  1662 +
  1663 + c.labels = C.CString(config.Labels)
  1664 + defer C.free(unsafe.Pointer(c.labels))
  1665 +
  1666 + c.top_k = C.int(config.TopK)
  1667 +
  1668 + tagging := &AudioTagging{}
  1669 + tagging.impl = C.SherpaOnnxCreateAudioTagging(&c)
  1670 +
  1671 + return tagging
  1672 +}
  1673 +
  1674 +// The user is responsible to invoke [DeleteOfflineStream]() to free
  1675 +// the returned stream to avoid memory leak
  1676 +func NewAudioTaggingStream(tagging *AudioTagging) *OfflineStream {
  1677 + stream := &OfflineStream{}
  1678 + stream.impl = C.SherpaOnnxAudioTaggingCreateOfflineStream(tagging.impl)
  1679 + return stream
  1680 +}
  1681 +
  1682 +func (tagging *AudioTagging) Compute(s *OfflineStream, topK int32) []AudioEvent {
  1683 + r := C.SherpaOnnxAudioTaggingCompute(tagging.impl, s.impl, C.int(topK))
  1684 + defer C.SherpaOnnxAudioTaggingFreeResults(r)
  1685 + result := make([]AudioEvent, 0)
  1686 +
  1687 + p := (*[1 << 28]*C.struct_SherpaOnnxAudioEvent)(unsafe.Pointer(r))
  1688 + i := 0
  1689 + for {
  1690 + if p[i] == nil {
  1691 + break
  1692 + }
  1693 + result = append(result, AudioEvent{
  1694 + Name: C.GoString(p[i].name),
  1695 + Index: int(p[i].index),
  1696 + Prob: float32(p[i].prob),
  1697 + })
  1698 + i += 1
  1699 + }
  1700 + return result
  1701 +}