Fangjun Kuang
Committed by GitHub

Add go-api-examples (#219)

正在显示 28 个修改的文件 包含 1624 行增加14 行删除
  1 +name: test-go
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - master
  7 + tags:
  8 + - '*'
  9 + pull_request:
  10 + branches:
  11 + - master
  12 +
  13 + workflow_dispatch:
  14 +
  15 +concurrency:
  16 + group: go-${{ github.ref }}
  17 + cancel-in-progress: true
  18 +
  19 +jobs:
  20 + go:
  21 + name: go ${{ matrix.os }} ${{matrix.arch }}
  22 + runs-on: ${{ matrix.os }}
  23 + strategy:
  24 + fail-fast: false
  25 + matrix:
  26 + include:
  27 + - os: ubuntu-latest
  28 + arch: amd64
  29 + - os: macos-latest
  30 + arch: amd64
  31 + - os: windows-latest
  32 + arch: x64
  33 + - os: windows-latest
  34 + arch: x86 # use 386 for GOARCH
  35 +
  36 + steps:
  37 + - uses: actions/checkout@v2
  38 + with:
  39 + fetch-depth: 0
  40 + - uses: actions/setup-go@v4
  41 + with:
  42 + go-version: '>=1.20'
  43 +
  44 + - name: Display go version
  45 + shell: bash
  46 + run: |
  47 + go version
  48 + go env GOPATH
  49 + go env GOARCH
  50 +
  51 + - name: Set up MinGW
  52 + if: matrix.os == 'windows-latest'
  53 + uses: egor-tensin/setup-mingw@v2
  54 + with:
  55 + platform: ${{ matrix.arch }}
  56 +
  57 + - name: Show gcc
  58 + if: matrix.os == 'windows-latest'
  59 + run: |
  60 + gcc --version
  61 +
  62 + - name: Test non-streaming decoding files (Linux/macOS)
  63 + if: matrix.os != 'windows-latest'
  64 + shell: bash
  65 + run: |
  66 + cd go-api-examples/non-streaming-decode-files
  67 + ls -lh
  68 + go mod tidy
  69 + cat go.mod
  70 + go build -x
  71 + ls -lh
  72 +
  73 + git lfs install
  74 +
  75 + echo "Test transducer"
  76 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26
  77 + ./run-transducer.sh
  78 + rm -rf sherpa-onnx-zipformer-en-2023-06-26
  79 +
  80 + echo "Test paraformer"
  81 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28
  82 + ./run-paraformer.sh
  83 + rm -rf sherpa-onnx-paraformer-zh-2023-03-28
  84 +
  85 + echo "Test NeMo CTC"
  86 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium
  87 + ./run-nemo-ctc.sh
  88 + rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
  89 +
  90 + - name: Test non-streaming decoding files (Win64)
  91 + if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
  92 + shell: bash
  93 + run: |
  94 + cd go-api-examples/non-streaming-decode-files
  95 + ls -lh
  96 + go mod tidy
  97 + cat go.mod
  98 + go build
  99 + ls -lh
  100 +
  101 + echo $PWD
  102 + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
  103 + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
  104 + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
  105 + ls -lh
  106 +
  107 + git lfs install
  108 +
  109 + echo "Test transducer"
  110 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26
  111 + ./run-transducer.sh
  112 + rm -rf sherpa-onnx-zipformer-en-2023-06-26
  113 +
  114 + echo "Test paraformer"
  115 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28
  116 + ./run-paraformer.sh
  117 + rm -rf sherpa-onnx-paraformer-zh-2023-03-28
  118 +
  119 + echo "Test NeMo CTC"
  120 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium
  121 + ./run-nemo-ctc.sh
  122 + rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
  123 +
  124 + - name: Test non-streaming decoding files (Win32)
  125 + if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
  126 + shell: bash
  127 + run: |
  128 + cd go-api-examples/non-streaming-decode-files
  129 + ls -lh
  130 + go mod tidy
  131 + cat go.mod
  132 + ls -lh
  133 +
  134 + go env GOARCH
  135 + go env
  136 + echo "------------------------------"
  137 + go env -w GOARCH=386
  138 + go env -w CGO_ENABLED=1
  139 + go env
  140 +
  141 + go clean
  142 + go build -x
  143 +
  144 + echo $PWD
  145 + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
  146 + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
  147 + ls -lh
  148 +
  149 + git lfs install
  150 +
  151 + echo "Test transducer"
  152 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26
  153 + ./run-transducer.sh
  154 + rm -rf sherpa-onnx-zipformer-en-2023-06-26
  155 +
  156 + echo "Test paraformer"
  157 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28
  158 + ./run-paraformer.sh
  159 + rm -rf sherpa-onnx-paraformer-zh-2023-03-28
  160 +
  161 + echo "Test NeMo CTC"
  162 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium
  163 + ./run-nemo-ctc.sh
  164 + rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
  165 +
  166 + - name: Test streaming decoding files (Linux/macOS)
  167 + if: matrix.os != 'windows-latest'
  168 + shell: bash
  169 + run: |
  170 + cd go-api-examples/streaming-decode-files
  171 + ls -lh
  172 + go mod tidy
  173 + cat go.mod
  174 + go build -x
  175 + ls -lh
  176 +
  177 + git lfs install
  178 +
  179 + echo "Test transducer"
  180 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26
  181 + ./run.sh
  182 + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
  183 +
  184 + - name: Test streaming decoding files (Win64)
  185 + if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
  186 + shell: bash
  187 + run: |
  188 + cd go-api-examples/streaming-decode-files
  189 + ls -lh
  190 + go mod tidy
  191 + cat go.mod
  192 + go build
  193 + ls -lh
  194 +
  195 + echo $PWD
  196 + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
  197 + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
  198 + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
  199 + ls -lh
  200 +
  201 + git lfs install
  202 +
  203 + echo "Test transducer"
  204 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26
  205 + ./run.sh
  206 + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
  207 +
  208 + - name: Test streaming decoding files (Win32)
  209 + if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
  210 + shell: bash
  211 + run: |
  212 + cd go-api-examples/streaming-decode-files
  213 + ls -lh
  214 + go mod tidy
  215 + cat go.mod
  216 + ls -lh
  217 +
  218 + go env GOARCH
  219 + go env
  220 + echo "------------------------------"
  221 + go env -w GOARCH=386
  222 + go env -w CGO_ENABLED=1
  223 + go env
  224 +
  225 + go clean
  226 + go build -x
  227 +
  228 + echo $PWD
  229 + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
  230 + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
  231 + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
  232 + ls -lh
  233 +
  234 + git lfs install
  235 +
  236 + echo "Test transducer"
  237 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26
  238 + ./run.sh
  239 + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
  1 +name: release-go
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - master
  7 + tags:
  8 + - '*'
  9 +
  10 + workflow_dispatch:
  11 + inputs:
  12 + version:
  13 + description: "Version information(e.g., 1.5.3) or auto"
  14 + required: true
  15 +
  16 +env:
  17 + VERSION:
  18 + |- # Enter release tag name or version name in workflow_dispatch. Appropriate version if not specified
  19 + ${{ github.event.release.tag_name || github.event.inputs.version }}
  20 +
  21 +concurrency:
  22 + group: release-go-${{ github.ref }}
  23 + cancel-in-progress: true
  24 +
  25 +jobs:
  26 + linux-x86_64_wheel:
  27 + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
  28 + name: Linux x86_64
  29 + runs-on: ${{ matrix.os }}
  30 + strategy:
  31 + fail-fast: false
  32 + matrix:
  33 + os: [ubuntu-latest]
  34 +
  35 + steps:
  36 + - uses: actions/checkout@v2
  37 +
  38 + - name: SSH to GitHub
  39 + run: |
  40 + mkdir -p ~/.ssh/
  41 + cp scripts/go/ssh_config ~/.ssh/config
  42 + echo "${{ secrets.MY_GITHUB_SSH_KEY }}" > ~/.ssh/github && chmod 600 ~/.ssh/github
  43 + ssh github.com || true
  44 + rm ~/.ssh/github
  45 +
  46 + # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
  47 + # for a list of versions
  48 + - name: Build wheels
  49 + uses: pypa/cibuildwheel@v2.11.4
  50 + env:
  51 + CIBW_BEFORE_BUILD: "pip install -U cmake numpy"
  52 + CIBW_BUILD: "cp38-*64"
  53 + CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
  54 + CIBW_BUILD_VERBOSITY: 3
  55 + CIBW_ENVIRONMENT_LINUX: LD_LIBRARY_PATH='/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib'
  56 +
  57 + - name: Display wheels
  58 + shell: bash
  59 + run: |
  60 + ls -lh ./wheelhouse/*.whl
  61 + unzip -l ./wheelhouse/*.whl
  62 +
  63 + - uses: actions/upload-artifact@v2
  64 + with:
  65 + name: ${{ matrix.os }}-wheels-for-go
  66 + path: ./wheelhouse/*.whl
  67 +
  68 + macOS:
  69 + name: macOS ${{ matrix.arch }}
  70 + runs-on: ${{ matrix.os }}
  71 + strategy:
  72 + fail-fast: false
  73 + matrix:
  74 + os: [macos-latest]
  75 + arch: [x86_64, arm64]
  76 +
  77 + steps:
  78 + - uses: actions/checkout@v2
  79 + - name: Configure CMake
  80 + shell: bash
  81 + run: |
  82 + mkdir build
  83 + cd build
  84 + cmake -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=ON -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} -DCMAKE_INSTALL_PREFIX=./install ..
  85 +
  86 + - name: Build sherpa-onnx for macOS ${{ matrix.arch }}
  87 + shell: bash
  88 + run: |
  89 + cd build
  90 + make -j2
  91 + make install
  92 +
  93 + ls -lh lib
  94 + ls -lh bin
  95 +
  96 + file install/lib/lib*
  97 +
  98 + - uses: actions/upload-artifact@v2
  99 + with:
  100 + name: ${{ matrix.os }}-for-${{ matrix.arch }}
  101 + path: ./build/install/lib/
  102 +
  103 + windows:
  104 + name: Windows ${{ matrix.arch }}
  105 + runs-on: ${{ matrix.os }}
  106 + strategy:
  107 + fail-fast: false
  108 + matrix:
  109 + os: [windows-latest]
  110 + arch: [x64, Win32]
  111 + steps:
  112 + - uses: actions/checkout@v2
  113 + with:
  114 + fetch-depth: 0
  115 +
  116 + - name: Configure CMake
  117 + shell: bash
  118 + run: |
  119 + mkdir build
  120 + cd build
  121 + cmake -A ${{ matrix.arch }} -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install ..
  122 +
  123 + - name: Build sherpa-onnx for windows
  124 + shell: bash
  125 + run: |
  126 + cd build
  127 + cmake --build . --config Release -- -m:2
  128 + cmake --build . --config Release --target install -- -m:2
  129 +
  130 + ls -lh install/*
  131 +
  132 + ls -lh install/lib
  133 + ls -lh install/bin
  134 +
  135 + - name: Upload artifact
  136 + uses: actions/upload-artifact@v2
  137 + with:
  138 + name: sherpa-onnx-go-windows-${{ matrix.arch }}
  139 + path: ./build/install/lib/
  140 +
  141 + Release:
  142 + name: Release
  143 + runs-on: ubuntu-latest
  144 + needs: [linux-x86_64_wheel, macOS, windows]
  145 +
  146 + steps:
  147 + - uses: actions/checkout@v2
  148 +
  149 + - name: Add SSH key
  150 + run: |
  151 + mkdir -p ~/.ssh/
  152 + cp scripts/go/ssh_config ~/.ssh/config
  153 + echo "${{ secrets.MY_GITHUB_SSH_KEY }}" > ~/.ssh/github && chmod 600 ~/.ssh/github
  154 + ssh github.com || true
  155 +
  156 + - name: Retrieve artifact from ubuntu-latest
  157 + uses: actions/download-artifact@v2
  158 + with:
  159 + name: ubuntu-latest-wheels-for-go
  160 + path: ./linux
  161 +
  162 + - name: Retrieve artifact from macos-latest (x86_64)
  163 + uses: actions/download-artifact@v2
  164 + with:
  165 + name: macos-latest-for-x86_64
  166 + path: ./macos-x86_64
  167 +
  168 + - name: Retrieve artifact from macos-latest (arm64)
  169 + uses: actions/download-artifact@v2
  170 + with:
  171 + name: macos-latest-for-arm64
  172 + path: ./macos-arm64
  173 +
  174 + - name: Retrieve artifact from windows-latest (x64)
  175 + uses: actions/download-artifact@v2
  176 + with:
  177 + name: sherpa-onnx-go-windows-x64
  178 + path: ./windows-x64
  179 +
  180 + - name: Retrieve artifact from windows-latest (Win32)
  181 + uses: actions/download-artifact@v2
  182 + with:
  183 + name: sherpa-onnx-go-windows-Win32
  184 + path: ./windows-win32
  185 +
  186 + - name: Unzip Ubuntu wheels
  187 + shell: bash
  188 + run: |
  189 + cd linux
  190 + ls -lh
  191 + unzip ./*.whl
  192 + tree .
  193 +
  194 + - name: Release go
  195 + if: env.VERSION != ''
  196 + shell: bash
  197 + run: |
  198 + ./scripts/go/release.sh
@@ -28,7 +28,7 @@ jobs: @@ -28,7 +28,7 @@ jobs:
28 fail-fast: false 28 fail-fast: false
29 matrix: 29 matrix:
30 os: [ubuntu-latest, windows-latest, macos-latest] 30 os: [ubuntu-latest, windows-latest, macos-latest]
31 - python-version: ["3.7", "3.8", "3.9", "3.10"] 31 + python-version: ["3.8", "3.9", "3.10"]
32 32
33 steps: 33 steps:
34 - uses: actions/checkout@v2 34 - uses: actions/checkout@v2
@@ -8,10 +8,6 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin) @@ -8,10 +8,6 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin)
8 message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}") 8 message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}")
9 endif() 9 endif()
10 10
11 -if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL arm64)  
12 - message(FATAL_ERROR "This file is for arm64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")  
13 -endif()  
14 -  
15 set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.15.1/onnxruntime-osx-arm64-1.15.1.tgz") 11 set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.15.1/onnxruntime-osx-arm64-1.15.1.tgz")
16 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/onnxruntime-osx-arm64-1.15.1.tgz") 12 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/onnxruntime-osx-arm64-1.15.1.tgz")
17 set(onnxruntime_HASH "SHA256=df97832fc7907c6677a6da437f92339d84a462becb74b1d65217fcb859ee9460") 13 set(onnxruntime_HASH "SHA256=df97832fc7907c6677a6da437f92339d84a462becb74b1d65217fcb859ee9460")
@@ -8,10 +8,6 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin) @@ -8,10 +8,6 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin)
8 message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}") 8 message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}")
9 endif() 9 endif()
10 10
11 -if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)  
12 - message(FATAL_ERROR "This file is for x86_64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")  
13 -endif()  
14 -  
15 set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.15.1/onnxruntime-osx-x86_64-1.15.1.tgz") 11 set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.15.1/onnxruntime-osx-x86_64-1.15.1.tgz")
16 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/onnxruntime-osx-x86_64-1.15.1.tgz") 12 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/onnxruntime-osx-x86_64-1.15.1.tgz")
17 set(onnxruntime_HASH "SHA256=4b66ebbca24b8b96f6b74655fee3610a7e529b4e01f6790632f24ee82b778e5a") 13 set(onnxruntime_HASH "SHA256=4b66ebbca24b8b96f6b74655fee3610a7e529b4e01f6790632f24ee82b778e5a")
@@ -16,12 +16,18 @@ function(download_onnxruntime) @@ -16,12 +16,18 @@ function(download_onnxruntime)
16 include(onnxruntime-linux-x86_64) 16 include(onnxruntime-linux-x86_64)
17 endif() 17 endif()
18 elseif(CMAKE_SYSTEM_NAME STREQUAL Darwin) 18 elseif(CMAKE_SYSTEM_NAME STREQUAL Darwin)
19 - if (arm64 IN_LIST CMAKE_OSX_ARCHITECTURES OR x86_64 IN_LIST CMAKE_OSX_ARCHITECTURES)  
20 - include(onnxruntime-darwin-universal)  
21 - elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)  
22 - include(onnxruntime-darwin-x86_64) 19 + if (arm64 IN_LIST CMAKE_OSX_ARCHITECTURES AND x86_64 IN_LIST CMAKE_OSX_ARCHITECTURES)
  20 + include(onnxruntime-osx-universal)
  21 + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
  22 + # cross compiling
  23 + include(onnxruntime-osx-arm64)
  24 + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64 AND CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64")
  25 + # cross compiling
  26 + include(onnxruntime-osx-x86_64)
23 elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64) 27 elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64)
24 - include(onnxruntime-darwin-arm64) 28 + include(onnxruntime-osx-arm64)
  29 + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)
  30 + include(onnxruntime-osx-x86_64)
25 else() 31 else()
26 message(FATAL_ERROR "Unsupport processor {CMAKE_SYSTEM_PROCESSOR} for Darwin") 32 message(FATAL_ERROR "Unsupport processor {CMAKE_SYSTEM_PROCESSOR} for Darwin")
27 endif() 33 endif()
  1 +non-streaming-decode-files
  2 +sherpa-onnx-zipformer-en-2023-06-26
  1 +module non-streaming-decode-files
  2 +
  3 +go 1.20
  4 +
  5 +require (
  6 + github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8
  7 + github.com/spf13/pflag v1.0.5
  8 + github.com/youpy/go-wav v0.3.2
  9 +)
  10 +
  11 +require (
  12 + github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 // indirect
  13 + github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 // indirect
  14 + github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 // indirect
  15 + github.com/youpy/go-riff v0.1.0 // indirect
  16 + github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect
  17 +)
  1 +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
  2 +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
  3 +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
  4 +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
  5 +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 h1:BXc31pWwd7CJLM6m9HavxqiyYJdN3Jc9I26pd4x+JHE=
  6 +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8/go.mod h1:kszL/pwg9XTpRGi1AYW/aSwdhRBqb6LN0SjXR0jnsBo=
  7 +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 h1:o0+l4Wr3IWkWH+kdt8ZZP55L8mRSpW7h1KzvXcfx9FM=
  8 +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6/go.mod h1:9kU02PSdDdzBApwIDBmE2jWS54WvZbRafOEvW/PVLxE=
  9 +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 h1:Xri23R3+tQFkNwO6zVxqZbjMRJP40z7JtoSqW6UP6sM=
  10 +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4/go.mod h1:a+AJZKNQkFO+JyzGkHySysYfBzzdcoJI5ITFsnhVcmo=
  11 +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 h1:OR1LLoptR8W35j5u06KrYUblG35B83HHCPWrVK31uHY=
  12 +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5/go.mod h1:ermMOETZUv0nM7MmnXWqeHREMR6zQXBhJpEP2fbHIZo=
  13 +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
  14 +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
  15 +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
  16 +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
  17 +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
  18 +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
  19 +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
  20 +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
  21 +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
  22 +github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k=
  23 +github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ=
  24 +github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU=
  25 +github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50=
  26 +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634=
  27 +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8=
  28 +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
  29 +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
  30 +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
  31 +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
  32 +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
  33 +gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
  1 +package main
  2 +
  3 +import (
  4 + "bytes"
  5 + "encoding/binary"
  6 + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
  7 + flag "github.com/spf13/pflag"
  8 + "github.com/youpy/go-wav"
  9 + "os"
  10 + "strings"
  11 +
  12 + "log"
  13 +)
  14 +
  15 +func main() {
  16 +
  17 + log.SetFlags(log.LstdFlags | log.Lmicroseconds)
  18 +
  19 + config := sherpa.OfflineRecognizerConfig{}
  20 + config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80}
  21 +
  22 + flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the encoder model")
  23 + flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the decoder model")
  24 + flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the joiner model")
  25 + flag.StringVar(&config.ModelConfig.Paraformer.Model, "paraformer", "", "Path to the paraformer model")
  26 + flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model")
  27 + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
  28 + flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
  29 + flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
  30 + flag.StringVar(&config.ModelConfig.ModelType, "model-type", "", "Optional. Used for loading the model in a faster way")
  31 + flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use")
  32 + flag.StringVar(&config.LmConfig.Model, "lm-model", "", "Optional. Path to the LM model")
  33 + flag.Float32Var(&config.LmConfig.Scale, "lm-scale", 1.0, "Optional. Scale for the LM model")
  34 +
  35 + flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search")
  36 + flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
  37 +
  38 + flag.Parse()
  39 +
  40 + if len(flag.Args()) != 1 {
  41 + log.Fatalf("Please provide one wave file")
  42 + }
  43 +
  44 + log.Println("Reading", flag.Arg(0))
  45 +
  46 + samples, sampleRate := readWave(flag.Arg(0))
  47 +
  48 + log.Println("Initializing recognizer (may take several seconds)")
  49 + recognizer := sherpa.NewOfflineRecognizer(&config)
  50 + log.Println("Recognizer created!")
  51 + defer sherpa.DeleteOfflineRecognizer(recognizer)
  52 +
  53 + log.Println("Start decoding!")
  54 + stream := sherpa.NewOfflineStream(recognizer)
  55 + defer sherpa.DeleteOfflineStream(stream)
  56 +
  57 + stream.AcceptWaveform(sampleRate, samples)
  58 +
  59 + recognizer.Decode(stream)
  60 + log.Println("Decoding done!")
  61 + result := stream.GetResult()
  62 +
  63 + log.Println(strings.ToLower(result.Text))
  64 + log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate))
  65 +}
  66 +
  67 +func readWave(filename string) (samples []float32, sampleRate int) {
  68 + file, _ := os.Open(filename)
  69 + defer file.Close()
  70 +
  71 + reader := wav.NewReader(file)
  72 + format, err := reader.Format()
  73 + if err != nil {
  74 + log.Fatalf("Failed to read wave format")
  75 + }
  76 +
  77 + if format.AudioFormat != 1 {
  78 + log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
  79 + }
  80 +
  81 + if format.NumChannels != 1 {
  82 + log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
  83 + }
  84 +
  85 + if format.BitsPerSample != 16 {
  86 + log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
  87 + }
  88 +
  89 + reader.Duration() // so that it initializes reader.Size
  90 +
  91 + buf := make([]byte, reader.Size)
  92 + n, err := reader.Read(buf)
  93 + if n != int(reader.Size) {
  94 + log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
  95 + }
  96 +
  97 + samples = samplesInt16ToFloat(buf)
  98 + sampleRate = int(format.SampleRate)
  99 +
  100 + return
  101 +}
  102 +
  103 +func samplesInt16ToFloat(inSamples []byte) []float32 {
  104 + numSamples := len(inSamples) / 2
  105 + outSamples := make([]float32, numSamples)
  106 +
  107 + for i := 0; i != numSamples; i++ {
  108 + s := inSamples[i*2 : (i+1)*2]
  109 +
  110 + var s16 int16
  111 + buf := bytes.NewReader(s)
  112 + err := binary.Read(buf, binary.LittleEndian, &s16)
  113 + if err != nil {
  114 + log.Fatal("Failed to parse 16-bit sample")
  115 + }
  116 + outSamples[i] = float32(s16) / 32768
  117 + }
  118 +
  119 + return outSamples
  120 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +# Please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/english.html#stt-en-conformer-ctc-medium
  5 +# to download the model
  6 +# before you run this script.
  7 +#
  8 +# You can switch to a different online model if you need
  9 +
  10 +./non-streaming-decode-files \
  11 + --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
  12 + --tokens ./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
  13 + --model-type nemo_ctc \
  14 + --debug 0 \
  15 + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +# Please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese
  5 +# to download the model
  6 +# before you run this script.
  7 +#
  8 +# You can switch to a different online model if you need
  9 +
  10 +./non-streaming-decode-files \
  11 + --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
  12 + --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
  13 + --model-type paraformer \
  14 + --debug 0 \
  15 + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +# Please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-zipformer-en-2023-06-26-english
  5 +# to download the model
  6 +# before you run this script.
  7 +#
  8 +# You can switch to a different online model if you need
  9 +
  10 +./non-streaming-decode-files \
  11 + --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \
  12 + --decoder ./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx \
  13 + --joiner ./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx \
  14 + --tokens ./sherpa-onnx-zipformer-en-2023-06-26/tokens.txt \
  15 + --model-type transducer \
  16 + --debug 0 \
  17 + ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav
  1 +# Introduction
  2 +
  3 +This examples shows how to use the golang package of [sherpa-onnx][sherpa-onnx]
  4 +for real-time speech recognition from microphone.
  5 +
  6 +It uses <https://github.com/gordonklaus/portaudio>
  7 +to read the microphone and you have to install `portaudio` first.
  8 +
  9 +On macOS, you can use
  10 +
  11 +```
  12 +brew install portaudio
  13 +```
  14 +
  15 +and it will install `portaudio` into `/usr/local/Cellar/portaudio/19.7.0`.
  16 +You need to set the following environment variable
  17 +```
  18 +export PKG_CONFIG_PATH=/usr/local/Cellar/portaudio/19.7.0
  19 +```
  20 +
  21 +so that `pkg-config --cflags --libs portaudio-2.0` can run successfully.
  22 +
  23 +[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
  1 +module real-time-speech-recognition-from-microphone
  2 +
  3 +go 1.20
  4 +
  5 +require (
  6 + github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5
  7 + github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8
  8 + github.com/spf13/pflag v1.0.5
  9 +)
  10 +
  11 +require (
  12 + github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 // indirect
  13 + github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 // indirect
  14 + github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 // indirect
  15 +)
  1 +github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 h1:5AlozfqaVjGYGhms2OsdUyfdJME76E6rx5MdGpjzZpc=
  2 +github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es=
  3 +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 h1:BXc31pWwd7CJLM6m9HavxqiyYJdN3Jc9I26pd4x+JHE=
  4 +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8/go.mod h1:kszL/pwg9XTpRGi1AYW/aSwdhRBqb6LN0SjXR0jnsBo=
  5 +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 h1:o0+l4Wr3IWkWH+kdt8ZZP55L8mRSpW7h1KzvXcfx9FM=
  6 +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6/go.mod h1:9kU02PSdDdzBApwIDBmE2jWS54WvZbRafOEvW/PVLxE=
  7 +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 h1:Xri23R3+tQFkNwO6zVxqZbjMRJP40z7JtoSqW6UP6sM=
  8 +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4/go.mod h1:a+AJZKNQkFO+JyzGkHySysYfBzzdcoJI5ITFsnhVcmo=
  9 +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 h1:OR1LLoptR8W35j5u06KrYUblG35B83HHCPWrVK31uHY=
  10 +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5/go.mod h1:ermMOETZUv0nM7MmnXWqeHREMR6zQXBhJpEP2fbHIZo=
  11 +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
  12 +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
  1 +package main
  2 +
  3 +import (
  4 + "github.com/gordonklaus/portaudio"
  5 + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
  6 + flag "github.com/spf13/pflag"
  7 + "strings"
  8 +
  9 + "fmt"
  10 + "log"
  11 +)
  12 +
  13 +func main() {
  14 + err := portaudio.Initialize()
  15 + if err != nil {
  16 + log.Fatalf("Unable to initialize portaudio: %v\n", err)
  17 + }
  18 + defer portaudio.Terminate()
  19 +
  20 + default_device, err := portaudio.DefaultInputDevice()
  21 + if err != nil {
  22 + log.Fatal("Failed to get default input device: %v\n", err)
  23 + }
  24 + fmt.Printf("Select default input device: %s\n", default_device.Name)
  25 + param := portaudio.StreamParameters{}
  26 + param.Input.Device = default_device
  27 + param.Input.Channels = 1
  28 + param.Input.Latency = default_device.DefaultLowInputLatency
  29 +
  30 + param.SampleRate = 16000
  31 + param.FramesPerBuffer = 0
  32 + param.Flags = portaudio.ClipOff
  33 +
  34 + config := sherpa.OnlineRecognizerConfig{}
  35 + config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80}
  36 +
  37 + flag.StringVar(&config.ModelConfig.Encoder, "encoder", "", "Path to the encoder model")
  38 + flag.StringVar(&config.ModelConfig.Decoder, "decoder", "", "Path to the decoder model")
  39 + flag.StringVar(&config.ModelConfig.Joiner, "joiner", "", "Path to the joiner model")
  40 + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
  41 + flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
  42 + flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
  43 + flag.StringVar(&config.ModelConfig.ModelType, "model-type", "", "Optional. Used for loading the model in a faster way")
  44 + flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use")
  45 + flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search")
  46 + flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
  47 + flag.IntVar(&config.EnableEndpoint, "enable-endpoint", 1, "Whether to enable endpoint")
  48 + flag.Float32Var(&config.Rule1MinTrailingSilence, "rule1-min-trailing-silence", 2.4, "Threshold for rule1")
  49 + flag.Float32Var(&config.Rule2MinTrailingSilence, "rule2-min-trailing-silence", 1.2, "Threshold for rule2")
  50 + flag.Float32Var(&config.Rule3MinUtteranceLength, "rule3-min-utterance-length", 20, "Threshold for rule3")
  51 +
  52 + flag.Parse()
  53 +
  54 + log.Println("Initializing recognizer (may take several seconds)")
  55 + recognizer := sherpa.NewOnlineRecognizer(&config)
  56 + log.Println("Recognizer created!")
  57 + defer sherpa.DeleteOnlineRecognizer(recognizer)
  58 +
  59 + stream := sherpa.NewOnlineStream(recognizer)
  60 +
  61 + // you can choose another value for 0.1 if you want
  62 + samplesPerCall := int32(param.SampleRate * 0.1) // 0.1 second
  63 +
  64 + samples := make([]float32, samplesPerCall)
  65 + s, err := portaudio.OpenStream(param, samples)
  66 + if err != nil {
  67 + log.Fatalf("Failed to open the stream")
  68 + }
  69 + defer s.Close()
  70 + chk(s.Start())
  71 +
  72 + var last_text string
  73 +
  74 + segment_idx := 0
  75 +
  76 + fmt.Println("Started! Please speak")
  77 +
  78 + for {
  79 + chk(s.Read())
  80 + stream.AcceptWaveform(int(param.SampleRate), samples)
  81 +
  82 + for recognizer.IsReady(stream) {
  83 + recognizer.Decode(stream)
  84 + }
  85 +
  86 + text := recognizer.GetResult(stream).Text
  87 + if len(text) != 0 && last_text != text {
  88 + last_text = strings.ToLower(text)
  89 + fmt.Printf("\r%d: %s", segment_idx, last_text)
  90 + }
  91 +
  92 + if recognizer.IsEndpoint(stream) {
  93 + if len(text) != 0 {
  94 + segment_idx++
  95 + fmt.Println()
  96 + }
  97 + recognizer.Reset(stream)
  98 + }
  99 + }
  100 +
  101 + chk(s.Stop())
  102 + return
  103 +
  104 +}
  105 +
  106 +func chk(err error) {
  107 + if err != nil {
  108 + panic(err)
  109 + }
  110 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +# Please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-streaming-wenetspeech-20230615-chinese
  5 +# to download the model
  6 +# before you run this script.
  7 +#
  8 +# You can switch to different online models if you need
  9 +
  10 +./real-time-speech-recognition-from-microphone \
  11 + --encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
  12 + --decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \
  13 + --joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx \
  14 + --tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \
  15 + --model-type zipformer2
  1 +module streaming-decode-files
  2 +
  3 +go 1.20
  4 +
  5 +require (
  6 + github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8
  7 + github.com/spf13/pflag v1.0.5
  8 + github.com/youpy/go-wav v0.3.2
  9 +)
  10 +
  11 +require (
  12 + github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 // indirect
  13 + github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 // indirect
  14 + github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 // indirect
  15 + github.com/youpy/go-riff v0.1.0 // indirect
  16 + github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect
  17 +)
  1 +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
  2 +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
  3 +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
  4 +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
  5 +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 h1:BXc31pWwd7CJLM6m9HavxqiyYJdN3Jc9I26pd4x+JHE=
  6 +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8/go.mod h1:kszL/pwg9XTpRGi1AYW/aSwdhRBqb6LN0SjXR0jnsBo=
  7 +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 h1:o0+l4Wr3IWkWH+kdt8ZZP55L8mRSpW7h1KzvXcfx9FM=
  8 +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6/go.mod h1:9kU02PSdDdzBApwIDBmE2jWS54WvZbRafOEvW/PVLxE=
  9 +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 h1:Xri23R3+tQFkNwO6zVxqZbjMRJP40z7JtoSqW6UP6sM=
  10 +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4/go.mod h1:a+AJZKNQkFO+JyzGkHySysYfBzzdcoJI5ITFsnhVcmo=
  11 +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 h1:OR1LLoptR8W35j5u06KrYUblG35B83HHCPWrVK31uHY=
  12 +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5/go.mod h1:ermMOETZUv0nM7MmnXWqeHREMR6zQXBhJpEP2fbHIZo=
  13 +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
  14 +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
  15 +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
  16 +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
  17 +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
  18 +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
  19 +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
  20 +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
  21 +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
  22 +github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k=
  23 +github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ=
  24 +github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU=
  25 +github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50=
  26 +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634=
  27 +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8=
  28 +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
  29 +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
  30 +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
  31 +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
  32 +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
  33 +gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
  1 +package main
  2 +
  3 +import (
  4 + "bytes"
  5 + "encoding/binary"
  6 + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
  7 + flag "github.com/spf13/pflag"
  8 + "github.com/youpy/go-wav"
  9 + "os"
  10 + "strings"
  11 +
  12 + "log"
  13 +)
  14 +
  15 +func main() {
  16 + log.SetFlags(log.LstdFlags | log.Lmicroseconds)
  17 +
  18 + config := sherpa.OnlineRecognizerConfig{}
  19 + config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80}
  20 +
  21 + flag.StringVar(&config.ModelConfig.Encoder, "encoder", "", "Path to the encoder model")
  22 + flag.StringVar(&config.ModelConfig.Decoder, "decoder", "", "Path to the decoder model")
  23 + flag.StringVar(&config.ModelConfig.Joiner, "joiner", "", "Path to the joiner model")
  24 + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
  25 + flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
  26 + flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
  27 + flag.StringVar(&config.ModelConfig.ModelType, "model-type", "", "Optional. Used for loading the model in a faster way")
  28 + flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use")
  29 + flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search")
  30 + flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
  31 +
  32 + flag.Parse()
  33 +
  34 + if len(flag.Args()) != 1 {
  35 + log.Fatalf("Please provide one wave file")
  36 + }
  37 +
  38 + log.Println("Reading", flag.Arg(0))
  39 +
  40 + samples, sampleRate := readWave(flag.Arg(0))
  41 +
  42 + log.Println("Initializing recognizer (may take several seconds)")
  43 + recognizer := sherpa.NewOnlineRecognizer(&config)
  44 + log.Println("Recognizer created!")
  45 + defer sherpa.DeleteOnlineRecognizer(recognizer)
  46 +
  47 + log.Println("Start decoding!")
  48 + stream := sherpa.NewOnlineStream(recognizer)
  49 + defer sherpa.DeleteOnlineStream(stream)
  50 +
  51 + stream.AcceptWaveform(sampleRate, samples)
  52 +
  53 + tailPadding := make([]float32, int(float32(sampleRate)*0.3))
  54 + stream.AcceptWaveform(sampleRate, tailPadding)
  55 +
  56 + for recognizer.IsReady(stream) {
  57 + recognizer.Decode(stream)
  58 + }
  59 + log.Println("Decoding done!")
  60 + result := recognizer.GetResult(stream)
  61 + log.Println(strings.ToLower(result.Text))
  62 + log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate))
  63 +}
  64 +
  65 +func readWave(filename string) (samples []float32, sampleRate int) {
  66 + file, _ := os.Open(filename)
  67 + defer file.Close()
  68 +
  69 + reader := wav.NewReader(file)
  70 + format, err := reader.Format()
  71 + if err != nil {
  72 + log.Fatalf("Failed to read wave format")
  73 + }
  74 +
  75 + if format.AudioFormat != 1 {
  76 + log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
  77 + }
  78 +
  79 + if format.NumChannels != 1 {
  80 + log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
  81 + }
  82 +
  83 + if format.BitsPerSample != 16 {
  84 + log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
  85 + }
  86 +
  87 + reader.Duration() // so that it initializes reader.Size
  88 +
  89 + buf := make([]byte, reader.Size)
  90 + n, err := reader.Read(buf)
  91 + if n != int(reader.Size) {
  92 + log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
  93 + }
  94 +
  95 + samples = samplesInt16ToFloat(buf)
  96 + sampleRate = int(format.SampleRate)
  97 +
  98 + return
  99 +}
  100 +
  101 +func samplesInt16ToFloat(inSamples []byte) []float32 {
  102 + numSamples := len(inSamples) / 2
  103 + outSamples := make([]float32, numSamples)
  104 +
  105 + for i := 0; i != numSamples; i++ {
  106 + s := inSamples[i*2 : (i+1)*2]
  107 +
  108 + var s16 int16
  109 + buf := bytes.NewReader(s)
  110 + err := binary.Read(buf, binary.LittleEndian, &s16)
  111 + if err != nil {
  112 + log.Fatal("Failed to parse 16-bit sample")
  113 + }
  114 + outSamples[i] = float32(s16) / 32768
  115 + }
  116 +
  117 + return outSamples
  118 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +# Please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english
  5 +# to download the model
  6 +# before you run this script.
  7 +#
  8 +# You can switch to a different online model if you need
  9 +
  10 +./streaming-decode-files \
  11 + --encoder ./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-128.onnx \
  12 + --decoder ./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-128.onnx \
  13 + --joiner ./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-128.onnx \
  14 + --tokens ./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt \
  15 + --model-type zipformer2 \
  16 + --debug 0 \
  17 + ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +git config --global user.email "csukuangfj@gmail.com"
  6 +git config --global user.name "Fangjun Kuang"
  7 +
  8 +SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
  9 +
  10 +echo "========================================================================="
  11 +
  12 +git clone git@github.com:k2-fsa/sherpa-onnx-go-linux.git
  13 +
  14 +echo "Copy libs for Linux x86_64"
  15 +
  16 +rm -rf sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/lib*
  17 +
  18 +cp -v ./linux/sherpa_onnx/lib/libkaldi-native-fbank-core.so sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/
  19 +cp -v ./linux/sherpa_onnx/lib/libonnxruntime* sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/
  20 +cp -v ./linux/sherpa_onnx/lib/libsherpa-onnx-c-api.so sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/
  21 +cp -v ./linux/sherpa_onnx/lib/libsherpa-onnx-core.so sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/
  22 +
  23 +echo "Copy sources for Linux x86_64"
  24 +cp sherpa-onnx/c-api/c-api.h sherpa-onnx-go-linux/
  25 +cp scripts/go/sherpa_onnx.go sherpa-onnx-go-linux/
  26 +
  27 +pushd sherpa-onnx-go-linux
  28 +tag=$(git describe --abbrev=0 --tags)
  29 +if [[ x"$VERSION" == x"auto" ]]; then
  30 + # this is a pre-release
  31 + if [[ $tag == ${SHERPA_ONNX_VERSION}* ]]; then
  32 + # echo we have already release pre-release before, so just increment it
  33 + last=$(echo $tag | rev | cut -d'.' -f 1 | rev)
  34 + new_last=$((last+1))
  35 + new_tag=${SHERPA_ONNX_VERSION}-alpha.${new_last}
  36 + else
  37 + new_tag=${SHERPA_ONNX_VERSION}-alpha.1
  38 + fi
  39 +else
  40 + new_tag=$VERSION
  41 +fi
  42 +
  43 +echo "new_tag: $new_tag"
  44 +git add .
  45 +git status
  46 +git commit -m "Release $new_tag" && \
  47 +git tag $new_tag && \
  48 +git push origin $new_tag || true
  49 +
  50 +popd
  51 +echo "========================================================================="
  52 +
  53 +git clone git@github.com:k2-fsa/sherpa-onnx-go-macos.git
  54 +
  55 +echo "Copy libs for macOS x86_64"
  56 +rm -rf sherpa-onnx-go-macos/lib/x86_64-apple-darwin/lib*
  57 +cp -v ./macos-x86_64/libkaldi-native-fbank-core.dylib sherpa-onnx-go-macos/lib/x86_64-apple-darwin
  58 +cp -v ./macos-x86_64/libonnxruntime* sherpa-onnx-go-macos/lib/x86_64-apple-darwin
  59 +cp -v ./macos-x86_64/libsherpa-onnx-c-api.dylib sherpa-onnx-go-macos/lib/x86_64-apple-darwin
  60 +cp -v ./macos-x86_64/libsherpa-onnx-core.dylib sherpa-onnx-go-macos/lib/x86_64-apple-darwin
  61 +
  62 +echo "Copy libs for macOS arm64"
  63 +rm -rf sherpa-onnx-go-macos/lib/aarch64-apple-darwin/lib*
  64 +cp -v ./macos-arm64/libkaldi-native-fbank-core.dylib sherpa-onnx-go-macos/lib/aarch64-apple-darwin
  65 +cp -v ./macos-arm64/libonnxruntime* sherpa-onnx-go-macos/lib/aarch64-apple-darwin
  66 +cp -v ./macos-arm64/libsherpa-onnx-c-api.dylib sherpa-onnx-go-macos/lib/aarch64-apple-darwin
  67 +cp -v ./macos-arm64/libsherpa-onnx-core.dylib sherpa-onnx-go-macos/lib/aarch64-apple-darwin
  68 +
  69 +echo "Copy sources for macOS"
  70 +cp sherpa-onnx/c-api/c-api.h sherpa-onnx-go-macos/
  71 +cp scripts/go/sherpa_onnx.go sherpa-onnx-go-macos/
  72 +
  73 +pushd sherpa-onnx-go-macos
  74 +tag=$(git describe --abbrev=0 --tags)
  75 +if [[ x"$VERSION" == x"auto" ]]; then
  76 + # this is a pre-release
  77 + if [[ $tag == ${SHERPA_ONNX_VERSION}* ]]; then
  78 + # echo we have already release pre-release before, so just increment it
  79 + last=$(echo $tag | rev | cut -d'.' -f 1 | rev)
  80 + new_last=$((last+1))
  81 + new_tag=${SHERPA_ONNX_VERSION}-alpha.${new_last}
  82 + else
  83 + new_tag=${SHERPA_ONNX_VERSION}-alpha.1
  84 + fi
  85 +else
  86 + new_tag=$VERSION
  87 +fi
  88 +
  89 +echo "new_tag: $new_tag"
  90 +git add .
  91 +git status
  92 +git commit -m "Release $new_tag" && \
  93 +git tag $new_tag && \
  94 +git push origin $new_tag || true
  95 +
  96 +popd
  97 +echo "========================================================================="
  98 +
  99 +git clone git@github.com:k2-fsa/sherpa-onnx-go-windows.git
  100 +echo "Copy libs for Windows x86_64"
  101 +rm -fv sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu/*
  102 +cp -v ./windows-x64/kaldi-native-fbank-core.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu
  103 +cp -v ./windows-x64/onnxruntime.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu
  104 +cp -v ./windows-x64/sherpa-onnx-c-api.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu
  105 +cp -v ./windows-x64/sherpa-onnx-core.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu
  106 +
  107 +echo "Copy libs for Windows x86"
  108 +rm -fv sherpa-onnx-go-windows/lib/i686-pc-windows-gnu/*
  109 +cp -v ./windows-win32/kaldi-native-fbank-core.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu
  110 +cp -v ./windows-win32/onnxruntime.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu
  111 +cp -v ./windows-win32/sherpa-onnx-c-api.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu
  112 +cp -v ./windows-win32/sherpa-onnx-core.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu
  113 +
  114 +echo "Copy sources for Windows"
  115 +cp sherpa-onnx/c-api/c-api.h sherpa-onnx-go-windows/
  116 +cp scripts/go/sherpa_onnx.go sherpa-onnx-go-windows/
  117 +
  118 +pushd sherpa-onnx-go-windows
  119 +tag=$(git describe --abbrev=0 --tags)
  120 +if [[ x"$VERSION" == x"auto" ]]; then
  121 + # this is a pre-release
  122 + if [[ $tag == ${SHERPA_ONNX_VERSION}* ]]; then
  123 + # echo we have already release pre-release before, so just increment it
  124 + last=$(echo $tag | rev | cut -d'.' -f 1 | rev)
  125 + new_last=$((last+1))
  126 + new_tag=${SHERPA_ONNX_VERSION}-alpha.${new_last}
  127 + else
  128 + new_tag=${SHERPA_ONNX_VERSION}-alpha.1
  129 + fi
  130 +else
  131 + new_tag=$VERSION
  132 +fi
  133 +
  134 +echo "new_tag: $new_tag"
  135 +git add .
  136 +git status
  137 +git commit -m "Release $new_tag" && \
  138 +git tag $new_tag && \
  139 +git push origin $new_tag || true
  140 +
  141 +popd
  142 +
  143 +echo "========================================================================="
  144 +
  145 +
  146 +rm -fv ~/.ssh/github
  1 +/*
  2 +Speech recognition with [Next-gen Kaldi].
  3 +
  4 +[sherpa-onnx] is an open-source speech recognition framework for [Next-gen Kaldi].
  5 +It depends only on [onnxruntime], supporting both streaming and non-streaming
  6 +speech recognition.
  7 +
  8 +It does not need to access the network during recognition and everything
  9 +runs locally.
  10 +
  11 +It supports a variety of platforms, such as Linux (x86_64, aarch64, arm),
  12 +Windows (x86_64, x86), macOS (x86_64, arm64), etc.
  13 +
  14 +Usage examples:
  15 +
  16 + 1. Real-time speech recognition from a microphone
  17 +
  18 + Please see
  19 + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/real-time-speech-recognition-from-microphone
  20 +
  21 + 2. Decode files using a non-streaming model
  22 +
  23 + Please see
  24 + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/non-streaming-decode-files
  25 +
  26 + 3. Decode files using a streaming model
  27 +
  28 + Please see
  29 + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/streaming-decode-files
  30 +
  31 +[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
  32 +[onnxruntime]: https://github.com/microsoft/onnxruntime
  33 +[Next-gen Kaldi]: https://github.com/k2-fsa/
  34 +*/
  35 +package sherpa_onnx
  36 +
  37 +// #include <stdlib.h>
  38 +// #include "c-api.h"
  39 +import "C"
  40 +import "unsafe"
  41 +
  42 +// Configuration for online/streaming transducer models
  43 +//
  44 +// Please refer to
  45 +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
  46 +// to download pre-trained models
  47 +type OnlineTransducerModelConfig struct {
  48 + Encoder string // Path to the encoder model, e.g., encoder.onnx or encoder.int8.onnx
  49 + Decoder string // Path to the decoder model.
  50 + Joiner string // Path to the joiner model.
  51 + Tokens string // Path to tokens.txt
  52 + NumThreads int // Number of threads to use for neural network computation
  53 + Provider string // Optional. Valid values are: cpu, cuda, coreml
  54 + Debug int // 1 to show model meta information while loading it.
  55 + ModelType string // Optional. You can specify it for faster model initialization
  56 +}
  57 +
  58 +// Configuration for the feature extractor
  59 +type FeatureConfig struct {
  60 + // Sample rate expected by the model. It is 16000 for all
  61 + // pre-trained models provided by us
  62 + SampleRate int
  63 + // Feature dimension expected by the model. It is 80 for all
  64 + // pre-trained models provided by us
  65 + FeatureDim int
  66 +}
  67 +
  68 +// Configuration for the online/streaming recognizer.
  69 +type OnlineRecognizerConfig struct {
  70 + FeatConfig FeatureConfig
  71 + ModelConfig OnlineTransducerModelConfig
  72 +
  73 + // Valid decoding methods: greedy_search, modified_beam_search
  74 + DecodingMethod string
  75 +
  76 + // Used only when DecodingMethod is modified_beam_search. It specifies
  77 + // the maximum number of paths to keep during the search
  78 + MaxActivePaths int
  79 +
  80 + EnableEndpoint int // 1 to enable endpoint detection.
  81 +
  82 + // Please see
  83 + // https://k2-fsa.github.io/sherpa/ncnn/endpoint.html
  84 + // for the meaning of Rule1MinTrailingSilence, Rule2MinTrailingSilence
  85 + // and Rule3MinUtteranceLength.
  86 + Rule1MinTrailingSilence float32
  87 + Rule2MinTrailingSilence float32
  88 + Rule3MinUtteranceLength float32
  89 +}
  90 +
  91 +// It contains the recognition result for a online stream.
  92 +type OnlineRecognizerResult struct {
  93 + Text string
  94 +}
  95 +
  96 +// The online recognizer class. It wraps a pointer from C.
  97 +type OnlineRecognizer struct {
  98 + impl *C.struct_SherpaOnnxOnlineRecognizer
  99 +}
  100 +
  101 +// The online stream class. It wraps a pointer from C.
  102 +type OnlineStream struct {
  103 + impl *C.struct_SherpaOnnxOnlineStream
  104 +}
  105 +
  106 +// Free the internal pointer inside the recognizer to avoid memory leak.
  107 +func DeleteOnlineRecognizer(recognizer *OnlineRecognizer) {
  108 + C.DestroyOnlineRecognizer(recognizer.impl)
  109 + recognizer.impl = nil
  110 +}
  111 +
  112 +// The user is responsible to invoke [DeleteOnlineRecognizer]() to free
  113 +// the returned recognizer to avoid memory leak
  114 +func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
  115 + c := C.struct_SherpaOnnxOnlineRecognizerConfig{}
  116 + c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate)
  117 + c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim)
  118 +
  119 + c.model_config.encoder = C.CString(config.ModelConfig.Encoder)
  120 + defer C.free(unsafe.Pointer(c.model_config.encoder))
  121 +
  122 + c.model_config.decoder = C.CString(config.ModelConfig.Decoder)
  123 + defer C.free(unsafe.Pointer(c.model_config.decoder))
  124 +
  125 + c.model_config.joiner = C.CString(config.ModelConfig.Joiner)
  126 + defer C.free(unsafe.Pointer(c.model_config.joiner))
  127 +
  128 + c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
  129 + defer C.free(unsafe.Pointer(c.model_config.tokens))
  130 +
  131 + c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)
  132 +
  133 + c.model_config.provider = C.CString(config.ModelConfig.Provider)
  134 + defer C.free(unsafe.Pointer(c.model_config.provider))
  135 +
  136 + c.model_config.debug = C.int(config.ModelConfig.Debug)
  137 +
  138 + c.model_config.model_type = C.CString(config.ModelConfig.ModelType)
  139 + defer C.free(unsafe.Pointer(c.model_config.model_type))
  140 +
  141 + c.decoding_method = C.CString(config.DecodingMethod)
  142 + defer C.free(unsafe.Pointer(c.decoding_method))
  143 +
  144 + c.max_active_paths = C.int(config.MaxActivePaths)
  145 + c.enable_endpoint = C.int(config.EnableEndpoint)
  146 + c.rule1_min_trailing_silence = C.float(config.Rule1MinTrailingSilence)
  147 + c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence)
  148 + c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength)
  149 +
  150 + recognizer := &OnlineRecognizer{}
  151 + recognizer.impl = C.CreateOnlineRecognizer(&c)
  152 +
  153 + return recognizer
  154 +}
  155 +
  156 +// Delete the internal pointer inside the stream to avoid memory leak.
  157 +func DeleteOnlineStream(stream *OnlineStream) {
  158 + C.DestroyOnlineStream(stream.impl)
  159 + stream.impl = nil
  160 +}
  161 +
  162 +// The user is responsible to invoke [DeleteOnlineStream]() to free
  163 +// the returned stream to avoid memory leak
  164 +func NewOnlineStream(recognizer *OnlineRecognizer) *OnlineStream {
  165 + stream := &OnlineStream{}
  166 + stream.impl = C.CreateOnlineStream(recognizer.impl)
  167 + return stream
  168 +}
  169 +
  170 +// Input audio samples for the stream.
  171 +//
  172 +// sampleRate is the actual sample rate of the input audio samples. If it
  173 +// is different from the sample rate expected by the feature extractor, we will
  174 +// do resampling inside.
  175 +//
  176 +// samples contains audio samples. Each sample is in the range [-1, 1]
  177 +func (s *OnlineStream) AcceptWaveform(sampleRate int, samples []float32) {
  178 + C.AcceptWaveform(s.impl, C.int(sampleRate), (*C.float)(&samples[0]), C.int(len(samples)))
  179 +}
  180 +
  181 +// Signal that there will be no incoming audio samples.
  182 +// After calling this function, you cannot call [OnlineStream.AcceptWaveform] any longer.
  183 +//
  184 +// The main purpose of this function is to flush the remaining audio samples
  185 +// buffered inside for feature extraction.
  186 +func (s *OnlineStream) InputFinished() {
  187 + C.InputFinished(s.impl)
  188 +}
  189 +
  190 +// Check whether the stream has enough feature frames for decoding.
  191 +// Return true if this stream is ready for decoding. Return false otherwise.
  192 +//
  193 +// You will usually use it like below:
  194 +//
  195 +// for recognizer.IsReady(s) {
  196 +// recognizer.Decode(s)
  197 +// }
  198 +func (recognizer *OnlineRecognizer) IsReady(s *OnlineStream) bool {
  199 + return C.IsOnlineStreamReady(recognizer.impl, s.impl) == 1
  200 +}
  201 +
  202 +// Return true if an endpoint is detected.
  203 +//
  204 +// You usually use it like below:
  205 +//
  206 +// if recognizer.IsEndpoint(s) {
  207 +// // do your own stuff after detecting an endpoint
  208 +//
  209 +// recognizer.Reset(s)
  210 +// }
  211 +func (recognizer *OnlineRecognizer) IsEndpoint(s *OnlineStream) bool {
  212 + return C.IsEndpoint(recognizer.impl, s.impl) == 1
  213 +}
  214 +
  215 +// After calling this function, the internal neural network model states
  216 +// are reset and IsEndpoint(s) would return false. GetResult(s) would also
  217 +// return an empty string.
  218 +func (recognizer *OnlineRecognizer) Reset(s *OnlineStream) {
  219 + C.Reset(recognizer.impl, s.impl)
  220 +}
  221 +
  222 +// Decode the stream. Before calling this function, you have to ensure
  223 +// that recognizer.IsReady(s) returns true. Otherwise, you will be SAD.
  224 +//
  225 +// You usually use it like below:
  226 +//
  227 +// for recognizer.IsReady(s) {
  228 +// recognizer.Decode(s)
  229 +// }
  230 +func (recognizer *OnlineRecognizer) Decode(s *OnlineStream) {
  231 + C.DecodeOnlineStream(recognizer.impl, s.impl)
  232 +}
  233 +
  234 +// Decode multiple streams in parallel, i.e., in batch.
  235 +// You have to ensure that each stream is ready for decoding. Otherwise,
  236 +// you will be SAD.
  237 +func (recognizer *OnlineRecognizer) DecodeStreams(s []*OnlineStream) {
  238 + ss := make([]*C.struct_SherpaOnnxOnlineStream, len(s))
  239 + for i, v := range s {
  240 + ss[i] = v.impl
  241 + }
  242 +
  243 + C.DecodeMultipleOnlineStreams(recognizer.impl, &ss[0], C.int(len(s)))
  244 +}
  245 +
  246 +// Get the current result of stream since the last invoke of Reset()
  247 +func (recognizer *OnlineRecognizer) GetResult(s *OnlineStream) *OnlineRecognizerResult {
  248 + p := C.GetOnlineStreamResult(recognizer.impl, s.impl)
  249 + defer C.DestroyOnlineRecognizerResult(p)
  250 + result := &OnlineRecognizerResult{}
  251 + result.Text = C.GoString(p.text)
  252 +
  253 + return result
  254 +}
  255 +
  256 +// Configuration for offline/non-streaming transducer.
  257 +//
  258 +// Please refer to
  259 +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html
  260 +// to download pre-trained models
  261 +type OfflineTransducerModelConfig struct {
  262 + Encoder string // Path to the encoder model, i.e., encoder.onnx or encoder.int8.onnx
  263 + Decoder string // Path to the decoder model
  264 + Joiner string // Path to the joiner model
  265 +}
  266 +
  267 +// Configuration for offline/non-streaming paraformer.
  268 +//
  269 +// please refer to
  270 +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
  271 +// to download pre-trained models
  272 +type OfflineParaformerModelConfig struct {
  273 + Model string // Path to the model, e.g., model.onnx or model.int8.onnx
  274 +}
  275 +
  276 +// Configuration for offline/non-streaming NeMo CTC models.
  277 +//
  278 +// Please refer to
  279 +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
  280 +// to download pre-trained models
  281 +type OfflineNemoEncDecCtcModelConfig struct {
  282 + Model string // Path to the model, e.g., model.onnx or model.int8.onnx
  283 +}
  284 +
  285 +// Configuration for offline LM.
  286 +type OfflineLMConfig struct {
  287 + Model string // Path to the model
  288 + Scale float32 // scale for LM score
  289 +}
  290 +
  291 +type OfflineModelConfig struct {
  292 + Transducer OfflineTransducerModelConfig
  293 + Paraformer OfflineParaformerModelConfig
  294 + NemoCTC OfflineNemoEncDecCtcModelConfig
  295 + Tokens string // Path to tokens.txt
  296 +
  297 + // Number of threads to use for neural network computation
  298 + NumThreads int
  299 +
  300 + // 1 to print model meta information while loading
  301 + Debug int
  302 +
  303 + // Optional. Valid values: cpu, cuda, coreml
  304 + Provider string
  305 +
  306 + // Optional. Specify it for faster model initialization.
  307 + ModelType string
  308 +}
  309 +
  310 +// Configuration for the offline/non-streaming recognizer.
  311 +type OfflineRecognizerConfig struct {
  312 + FeatConfig FeatureConfig
  313 + ModelConfig OfflineModelConfig
  314 + LmConfig OfflineLMConfig
  315 +
  316 + // Valid decoding method: greedy_search, modified_beam_search
  317 + DecodingMethod string
  318 +
  319 + // Used only when DecodingMethod is modified_beam_search.
  320 + MaxActivePaths int
  321 +}
  322 +
  323 +// It wraps a pointer from C
  324 +type OfflineRecognizer struct {
  325 + impl *C.struct_SherpaOnnxOfflineRecognizer
  326 +}
  327 +
  328 +// It wraps a pointer from C
  329 +type OfflineStream struct {
  330 + impl *C.struct_SherpaOnnxOfflineStream
  331 +}
  332 +
  333 +// It contains recognition result of an offline stream.
  334 +type OfflineRecognizerResult struct {
  335 + Text string
  336 +}
  337 +
  338 +// Frees the internal pointer of the recognition to avoid memory leak.
  339 +func DeleteOfflineRecognizer(recognizer *OfflineRecognizer) {
  340 + C.DestroyOfflineRecognizer(recognizer.impl)
  341 + recognizer.impl = nil
  342 +}
  343 +
  344 +// The user is responsible to invoke [DeleteOfflineRecognizer]() to free
  345 +// the returned recognizer to avoid memory leak
  346 +func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
  347 + c := C.struct_SherpaOnnxOfflineRecognizerConfig{}
  348 + c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate)
  349 + c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim)
  350 +
  351 + c.model_config.transducer.encoder = C.CString(config.ModelConfig.Transducer.Encoder)
  352 + defer C.free(unsafe.Pointer(c.model_config.transducer.encoder))
  353 +
  354 + c.model_config.transducer.decoder = C.CString(config.ModelConfig.Transducer.Decoder)
  355 + defer C.free(unsafe.Pointer(c.model_config.transducer.decoder))
  356 +
  357 + c.model_config.transducer.joiner = C.CString(config.ModelConfig.Transducer.Joiner)
  358 + defer C.free(unsafe.Pointer(c.model_config.transducer.joiner))
  359 +
  360 + c.model_config.paraformer.model = C.CString(config.ModelConfig.Paraformer.Model)
  361 + defer C.free(unsafe.Pointer(c.model_config.paraformer.model))
  362 +
  363 + c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCTC.Model)
  364 + defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model))
  365 +
  366 + c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
  367 + defer C.free(unsafe.Pointer(c.model_config.tokens))
  368 +
  369 + c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)
  370 +
  371 + c.model_config.debug = C.int(config.ModelConfig.Debug)
  372 +
  373 + c.model_config.provider = C.CString(config.ModelConfig.Provider)
  374 + defer C.free(unsafe.Pointer(c.model_config.provider))
  375 +
  376 + c.model_config.model_type = C.CString(config.ModelConfig.ModelType)
  377 + defer C.free(unsafe.Pointer(c.model_config.model_type))
  378 +
  379 + c.lm_config.model = C.CString(config.LmConfig.Model)
  380 + defer C.free(unsafe.Pointer(c.lm_config.model))
  381 +
  382 + c.lm_config.scale = C.float(config.LmConfig.Scale)
  383 +
  384 + c.decoding_method = C.CString(config.DecodingMethod)
  385 + defer C.free(unsafe.Pointer(c.decoding_method))
  386 +
  387 + c.max_active_paths = C.int(config.MaxActivePaths)
  388 +
  389 + recognizer := &OfflineRecognizer{}
  390 + recognizer.impl = C.CreateOfflineRecognizer(&c)
  391 +
  392 + return recognizer
  393 +}
  394 +
  395 +// Frees the internal pointer of the stream to avoid memory leak.
  396 +func DeleteOfflineStream(stream *OfflineStream) {
  397 + C.DestroyOfflineStream(stream.impl)
  398 + stream.impl = nil
  399 +}
  400 +
  401 +// The user is responsible to invoke [DeleteOfflineStream]() to free
  402 +// the returned stream to avoid memory leak
  403 +func NewOfflineStream(recognizer *OfflineRecognizer) *OfflineStream {
  404 + stream := &OfflineStream{}
  405 + stream.impl = C.CreateOfflineStream(recognizer.impl)
  406 + return stream
  407 +}
  408 +
  409 +// Input audio samples for the offline stream.
  410 +// Please only call it once. That is, input all samples at once.
  411 +//
  412 +// sampleRate is the sample rate of the input audio samples. If it is different
  413 +// from the value expected by the feature extractor, we will do resampling inside.
  414 +//
  415 +// samples contains the actual audio samples. Each sample is in the range [-1, 1].
  416 +func (s *OfflineStream) AcceptWaveform(sampleRate int, samples []float32) {
  417 + C.AcceptWaveformOffline(s.impl, C.int(sampleRate), (*C.float)(&samples[0]), C.int(len(samples)))
  418 +}
  419 +
  420 +// Decode the offline stream.
  421 +func (recognizer *OfflineRecognizer) Decode(s *OfflineStream) {
  422 + C.DecodeOfflineStream(recognizer.impl, s.impl)
  423 +}
  424 +
  425 +// Decode multiple streams in parallel, i.e., in batch.
  426 +func (recognizer *OfflineRecognizer) DecodeStreams(s []*OfflineStream) {
  427 + ss := make([]*C.struct_SherpaOnnxOfflineStream, len(s))
  428 + for i, v := range s {
  429 + ss[i] = v.impl
  430 + }
  431 +
  432 + C.DecodeMultipleOfflineStreams(recognizer.impl, &ss[0], C.int(len(s)))
  433 +}
  434 +
  435 +// Get the recognition result of the offline stream.
  436 +func (s *OfflineStream) GetResult() *OfflineRecognizerResult {
  437 + p := C.GetOfflineStreamResult(s.impl)
  438 + defer C.DestroyOfflineRecognizerResult(p)
  439 + result := &OfflineRecognizerResult{}
  440 + result.Text = C.GoString(p.text)
  441 +
  442 + return result
  443 +}
  1 +Host github.com
  2 + Hostname github.com
  3 + User git
  4 + IdentityFile ~/.ssh/github
  5 + StrictHostKeyChecking no