正在显示
28 个修改的文件
包含
1624 行增加
和
14 行删除
.github/workflows/go.yaml
0 → 100644
| 1 | +name: test-go | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - master | ||
| 7 | + tags: | ||
| 8 | + - '*' | ||
| 9 | + pull_request: | ||
| 10 | + branches: | ||
| 11 | + - master | ||
| 12 | + | ||
| 13 | + workflow_dispatch: | ||
| 14 | + | ||
| 15 | +concurrency: | ||
| 16 | + group: go-${{ github.ref }} | ||
| 17 | + cancel-in-progress: true | ||
| 18 | + | ||
| 19 | +jobs: | ||
| 20 | + go: | ||
| 21 | + name: go ${{ matrix.os }} ${{matrix.arch }} | ||
| 22 | + runs-on: ${{ matrix.os }} | ||
| 23 | + strategy: | ||
| 24 | + fail-fast: false | ||
| 25 | + matrix: | ||
| 26 | + include: | ||
| 27 | + - os: ubuntu-latest | ||
| 28 | + arch: amd64 | ||
| 29 | + - os: macos-latest | ||
| 30 | + arch: amd64 | ||
| 31 | + - os: windows-latest | ||
| 32 | + arch: x64 | ||
| 33 | + - os: windows-latest | ||
| 34 | + arch: x86 # use 386 for GOARCH | ||
| 35 | + | ||
| 36 | + steps: | ||
| 37 | + - uses: actions/checkout@v2 | ||
| 38 | + with: | ||
| 39 | + fetch-depth: 0 | ||
| 40 | + - uses: actions/setup-go@v4 | ||
| 41 | + with: | ||
| 42 | + go-version: '>=1.20' | ||
| 43 | + | ||
| 44 | + - name: Display go version | ||
| 45 | + shell: bash | ||
| 46 | + run: | | ||
| 47 | + go version | ||
| 48 | + go env GOPATH | ||
| 49 | + go env GOARCH | ||
| 50 | + | ||
| 51 | + - name: Set up MinGW | ||
| 52 | + if: matrix.os == 'windows-latest' | ||
| 53 | + uses: egor-tensin/setup-mingw@v2 | ||
| 54 | + with: | ||
| 55 | + platform: ${{ matrix.arch }} | ||
| 56 | + | ||
| 57 | + - name: Show gcc | ||
| 58 | + if: matrix.os == 'windows-latest' | ||
| 59 | + run: | | ||
| 60 | + gcc --version | ||
| 61 | + | ||
| 62 | + - name: Test non-streaming decoding files (Linux/macOS) | ||
| 63 | + if: matrix.os != 'windows-latest' | ||
| 64 | + shell: bash | ||
| 65 | + run: | | ||
| 66 | + cd go-api-examples/non-streaming-decode-files | ||
| 67 | + ls -lh | ||
| 68 | + go mod tidy | ||
| 69 | + cat go.mod | ||
| 70 | + go build -x | ||
| 71 | + ls -lh | ||
| 72 | + | ||
| 73 | + git lfs install | ||
| 74 | + | ||
| 75 | + echo "Test transducer" | ||
| 76 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26 | ||
| 77 | + ./run-transducer.sh | ||
| 78 | + rm -rf sherpa-onnx-zipformer-en-2023-06-26 | ||
| 79 | + | ||
| 80 | + echo "Test paraformer" | ||
| 81 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 | ||
| 82 | + ./run-paraformer.sh | ||
| 83 | + rm -rf sherpa-onnx-paraformer-zh-2023-03-28 | ||
| 84 | + | ||
| 85 | + echo "Test NeMo CTC" | ||
| 86 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium | ||
| 87 | + ./run-nemo-ctc.sh | ||
| 88 | + rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium | ||
| 89 | + | ||
| 90 | + - name: Test non-streaming decoding files (Win64) | ||
| 91 | + if: matrix.os == 'windows-latest' && matrix.arch == 'x64' | ||
| 92 | + shell: bash | ||
| 93 | + run: | | ||
| 94 | + cd go-api-examples/non-streaming-decode-files | ||
| 95 | + ls -lh | ||
| 96 | + go mod tidy | ||
| 97 | + cat go.mod | ||
| 98 | + go build | ||
| 99 | + ls -lh | ||
| 100 | + | ||
| 101 | + echo $PWD | ||
| 102 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ | ||
| 103 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* | ||
| 104 | + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . | ||
| 105 | + ls -lh | ||
| 106 | + | ||
| 107 | + git lfs install | ||
| 108 | + | ||
| 109 | + echo "Test transducer" | ||
| 110 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26 | ||
| 111 | + ./run-transducer.sh | ||
| 112 | + rm -rf sherpa-onnx-zipformer-en-2023-06-26 | ||
| 113 | + | ||
| 114 | + echo "Test paraformer" | ||
| 115 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 | ||
| 116 | + ./run-paraformer.sh | ||
| 117 | + rm -rf sherpa-onnx-paraformer-zh-2023-03-28 | ||
| 118 | + | ||
| 119 | + echo "Test NeMo CTC" | ||
| 120 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium | ||
| 121 | + ./run-nemo-ctc.sh | ||
| 122 | + rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium | ||
| 123 | + | ||
| 124 | + - name: Test non-streaming decoding files (Win32) | ||
| 125 | + if: matrix.os == 'windows-latest' && matrix.arch == 'x86' | ||
| 126 | + shell: bash | ||
| 127 | + run: | | ||
| 128 | + cd go-api-examples/non-streaming-decode-files | ||
| 129 | + ls -lh | ||
| 130 | + go mod tidy | ||
| 131 | + cat go.mod | ||
| 132 | + ls -lh | ||
| 133 | + | ||
| 134 | + go env GOARCH | ||
| 135 | + go env | ||
| 136 | + echo "------------------------------" | ||
| 137 | + go env -w GOARCH=386 | ||
| 138 | + go env -w CGO_ENABLED=1 | ||
| 139 | + go env | ||
| 140 | + | ||
| 141 | + go clean | ||
| 142 | + go build -x | ||
| 143 | + | ||
| 144 | + echo $PWD | ||
| 145 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ | ||
| 146 | + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . | ||
| 147 | + ls -lh | ||
| 148 | + | ||
| 149 | + git lfs install | ||
| 150 | + | ||
| 151 | + echo "Test transducer" | ||
| 152 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26 | ||
| 153 | + ./run-transducer.sh | ||
| 154 | + rm -rf sherpa-onnx-zipformer-en-2023-06-26 | ||
| 155 | + | ||
| 156 | + echo "Test paraformer" | ||
| 157 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 | ||
| 158 | + ./run-paraformer.sh | ||
| 159 | + rm -rf sherpa-onnx-paraformer-zh-2023-03-28 | ||
| 160 | + | ||
| 161 | + echo "Test NeMo CTC" | ||
| 162 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium | ||
| 163 | + ./run-nemo-ctc.sh | ||
| 164 | + rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium | ||
| 165 | + | ||
| 166 | + - name: Test streaming decoding files (Linux/macOS) | ||
| 167 | + if: matrix.os != 'windows-latest' | ||
| 168 | + shell: bash | ||
| 169 | + run: | | ||
| 170 | + cd go-api-examples/streaming-decode-files | ||
| 171 | + ls -lh | ||
| 172 | + go mod tidy | ||
| 173 | + cat go.mod | ||
| 174 | + go build -x | ||
| 175 | + ls -lh | ||
| 176 | + | ||
| 177 | + git lfs install | ||
| 178 | + | ||
| 179 | + echo "Test transducer" | ||
| 180 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26 | ||
| 181 | + ./run.sh | ||
| 182 | + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 | ||
| 183 | + | ||
| 184 | + - name: Test streaming decoding files (Win64) | ||
| 185 | + if: matrix.os == 'windows-latest' && matrix.arch == 'x64' | ||
| 186 | + shell: bash | ||
| 187 | + run: | | ||
| 188 | + cd go-api-examples/streaming-decode-files | ||
| 189 | + ls -lh | ||
| 190 | + go mod tidy | ||
| 191 | + cat go.mod | ||
| 192 | + go build | ||
| 193 | + ls -lh | ||
| 194 | + | ||
| 195 | + echo $PWD | ||
| 196 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ | ||
| 197 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* | ||
| 198 | + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . | ||
| 199 | + ls -lh | ||
| 200 | + | ||
| 201 | + git lfs install | ||
| 202 | + | ||
| 203 | + echo "Test transducer" | ||
| 204 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26 | ||
| 205 | + ./run.sh | ||
| 206 | + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 | ||
| 207 | + | ||
| 208 | + - name: Test streaming decoding files (Win32) | ||
| 209 | + if: matrix.os == 'windows-latest' && matrix.arch == 'x86' | ||
| 210 | + shell: bash | ||
| 211 | + run: | | ||
| 212 | + cd go-api-examples/streaming-decode-files | ||
| 213 | + ls -lh | ||
| 214 | + go mod tidy | ||
| 215 | + cat go.mod | ||
| 216 | + ls -lh | ||
| 217 | + | ||
| 218 | + go env GOARCH | ||
| 219 | + go env | ||
| 220 | + echo "------------------------------" | ||
| 221 | + go env -w GOARCH=386 | ||
| 222 | + go env -w CGO_ENABLED=1 | ||
| 223 | + go env | ||
| 224 | + | ||
| 225 | + go clean | ||
| 226 | + go build -x | ||
| 227 | + | ||
| 228 | + echo $PWD | ||
| 229 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ | ||
| 230 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* | ||
| 231 | + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . | ||
| 232 | + ls -lh | ||
| 233 | + | ||
| 234 | + git lfs install | ||
| 235 | + | ||
| 236 | + echo "Test transducer" | ||
| 237 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26 | ||
| 238 | + ./run.sh | ||
| 239 | + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 |
.github/workflows/release-go.yaml
0 → 100644
| 1 | +name: release-go | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - master | ||
| 7 | + tags: | ||
| 8 | + - '*' | ||
| 9 | + | ||
| 10 | + workflow_dispatch: | ||
| 11 | + inputs: | ||
| 12 | + version: | ||
| 13 | + description: "Version information(e.g., 1.5.3) or auto" | ||
| 14 | + required: true | ||
| 15 | + | ||
| 16 | +env: | ||
| 17 | + VERSION: | ||
| 18 | + |- # Enter release tag name or version name in workflow_dispatch. Appropriate version if not specified | ||
| 19 | + ${{ github.event.release.tag_name || github.event.inputs.version }} | ||
| 20 | + | ||
| 21 | +concurrency: | ||
| 22 | + group: release-go-${{ github.ref }} | ||
| 23 | + cancel-in-progress: true | ||
| 24 | + | ||
| 25 | +jobs: | ||
| 26 | + linux-x86_64_wheel: | ||
| 27 | + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' | ||
| 28 | + name: Linux x86_64 | ||
| 29 | + runs-on: ${{ matrix.os }} | ||
| 30 | + strategy: | ||
| 31 | + fail-fast: false | ||
| 32 | + matrix: | ||
| 33 | + os: [ubuntu-latest] | ||
| 34 | + | ||
| 35 | + steps: | ||
| 36 | + - uses: actions/checkout@v2 | ||
| 37 | + | ||
| 38 | + - name: SSH to GitHub | ||
| 39 | + run: | | ||
| 40 | + mkdir -p ~/.ssh/ | ||
| 41 | + cp scripts/go/ssh_config ~/.ssh/config | ||
| 42 | + echo "${{ secrets.MY_GITHUB_SSH_KEY }}" > ~/.ssh/github && chmod 600 ~/.ssh/github | ||
| 43 | + ssh github.com || true | ||
| 44 | + rm ~/.ssh/github | ||
| 45 | + | ||
| 46 | + # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ | ||
| 47 | + # for a list of versions | ||
| 48 | + - name: Build wheels | ||
| 49 | + uses: pypa/cibuildwheel@v2.11.4 | ||
| 50 | + env: | ||
| 51 | + CIBW_BEFORE_BUILD: "pip install -U cmake numpy" | ||
| 52 | + CIBW_BUILD: "cp38-*64" | ||
| 53 | + CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686" | ||
| 54 | + CIBW_BUILD_VERBOSITY: 3 | ||
| 55 | + CIBW_ENVIRONMENT_LINUX: LD_LIBRARY_PATH='/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib' | ||
| 56 | + | ||
| 57 | + - name: Display wheels | ||
| 58 | + shell: bash | ||
| 59 | + run: | | ||
| 60 | + ls -lh ./wheelhouse/*.whl | ||
| 61 | + unzip -l ./wheelhouse/*.whl | ||
| 62 | + | ||
| 63 | + - uses: actions/upload-artifact@v2 | ||
| 64 | + with: | ||
| 65 | + name: ${{ matrix.os }}-wheels-for-go | ||
| 66 | + path: ./wheelhouse/*.whl | ||
| 67 | + | ||
| 68 | + macOS: | ||
| 69 | + name: macOS ${{ matrix.arch }} | ||
| 70 | + runs-on: ${{ matrix.os }} | ||
| 71 | + strategy: | ||
| 72 | + fail-fast: false | ||
| 73 | + matrix: | ||
| 74 | + os: [macos-latest] | ||
| 75 | + arch: [x86_64, arm64] | ||
| 76 | + | ||
| 77 | + steps: | ||
| 78 | + - uses: actions/checkout@v2 | ||
| 79 | + - name: Configure CMake | ||
| 80 | + shell: bash | ||
| 81 | + run: | | ||
| 82 | + mkdir build | ||
| 83 | + cd build | ||
| 84 | + cmake -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=ON -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} -DCMAKE_INSTALL_PREFIX=./install .. | ||
| 85 | + | ||
| 86 | + - name: Build sherpa-onnx for macOS ${{ matrix.arch }} | ||
| 87 | + shell: bash | ||
| 88 | + run: | | ||
| 89 | + cd build | ||
| 90 | + make -j2 | ||
| 91 | + make install | ||
| 92 | + | ||
| 93 | + ls -lh lib | ||
| 94 | + ls -lh bin | ||
| 95 | + | ||
| 96 | + file install/lib/lib* | ||
| 97 | + | ||
| 98 | + - uses: actions/upload-artifact@v2 | ||
| 99 | + with: | ||
| 100 | + name: ${{ matrix.os }}-for-${{ matrix.arch }} | ||
| 101 | + path: ./build/install/lib/ | ||
| 102 | + | ||
| 103 | + windows: | ||
| 104 | + name: Windows ${{ matrix.arch }} | ||
| 105 | + runs-on: ${{ matrix.os }} | ||
| 106 | + strategy: | ||
| 107 | + fail-fast: false | ||
| 108 | + matrix: | ||
| 109 | + os: [windows-latest] | ||
| 110 | + arch: [x64, Win32] | ||
| 111 | + steps: | ||
| 112 | + - uses: actions/checkout@v2 | ||
| 113 | + with: | ||
| 114 | + fetch-depth: 0 | ||
| 115 | + | ||
| 116 | + - name: Configure CMake | ||
| 117 | + shell: bash | ||
| 118 | + run: | | ||
| 119 | + mkdir build | ||
| 120 | + cd build | ||
| 121 | + cmake -A ${{ matrix.arch }} -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install .. | ||
| 122 | + | ||
| 123 | + - name: Build sherpa-onnx for windows | ||
| 124 | + shell: bash | ||
| 125 | + run: | | ||
| 126 | + cd build | ||
| 127 | + cmake --build . --config Release -- -m:2 | ||
| 128 | + cmake --build . --config Release --target install -- -m:2 | ||
| 129 | + | ||
| 130 | + ls -lh install/* | ||
| 131 | + | ||
| 132 | + ls -lh install/lib | ||
| 133 | + ls -lh install/bin | ||
| 134 | + | ||
| 135 | + - name: Upload artifact | ||
| 136 | + uses: actions/upload-artifact@v2 | ||
| 137 | + with: | ||
| 138 | + name: sherpa-onnx-go-windows-${{ matrix.arch }} | ||
| 139 | + path: ./build/install/lib/ | ||
| 140 | + | ||
| 141 | + Release: | ||
| 142 | + name: Release | ||
| 143 | + runs-on: ubuntu-latest | ||
| 144 | + needs: [linux-x86_64_wheel, macOS, windows] | ||
| 145 | + | ||
| 146 | + steps: | ||
| 147 | + - uses: actions/checkout@v2 | ||
| 148 | + | ||
| 149 | + - name: Add SSH key | ||
| 150 | + run: | | ||
| 151 | + mkdir -p ~/.ssh/ | ||
| 152 | + cp scripts/go/ssh_config ~/.ssh/config | ||
| 153 | + echo "${{ secrets.MY_GITHUB_SSH_KEY }}" > ~/.ssh/github && chmod 600 ~/.ssh/github | ||
| 154 | + ssh github.com || true | ||
| 155 | + | ||
| 156 | + - name: Retrieve artifact from ubuntu-latest | ||
| 157 | + uses: actions/download-artifact@v2 | ||
| 158 | + with: | ||
| 159 | + name: ubuntu-latest-wheels-for-go | ||
| 160 | + path: ./linux | ||
| 161 | + | ||
| 162 | + - name: Retrieve artifact from macos-latest (x86_64) | ||
| 163 | + uses: actions/download-artifact@v2 | ||
| 164 | + with: | ||
| 165 | + name: macos-latest-for-x86_64 | ||
| 166 | + path: ./macos-x86_64 | ||
| 167 | + | ||
| 168 | + - name: Retrieve artifact from macos-latest (arm64) | ||
| 169 | + uses: actions/download-artifact@v2 | ||
| 170 | + with: | ||
| 171 | + name: macos-latest-for-arm64 | ||
| 172 | + path: ./macos-arm64 | ||
| 173 | + | ||
| 174 | + - name: Retrieve artifact from windows-latest (x64) | ||
| 175 | + uses: actions/download-artifact@v2 | ||
| 176 | + with: | ||
| 177 | + name: sherpa-onnx-go-windows-x64 | ||
| 178 | + path: ./windows-x64 | ||
| 179 | + | ||
| 180 | + - name: Retrieve artifact from windows-latest (Win32) | ||
| 181 | + uses: actions/download-artifact@v2 | ||
| 182 | + with: | ||
| 183 | + name: sherpa-onnx-go-windows-Win32 | ||
| 184 | + path: ./windows-win32 | ||
| 185 | + | ||
| 186 | + - name: Unzip Ubuntu wheels | ||
| 187 | + shell: bash | ||
| 188 | + run: | | ||
| 189 | + cd linux | ||
| 190 | + ls -lh | ||
| 191 | + unzip ./*.whl | ||
| 192 | + tree . | ||
| 193 | + | ||
| 194 | + - name: Release go | ||
| 195 | + if: env.VERSION != '' | ||
| 196 | + shell: bash | ||
| 197 | + run: | | ||
| 198 | + ./scripts/go/release.sh |
| @@ -28,7 +28,7 @@ jobs: | @@ -28,7 +28,7 @@ jobs: | ||
| 28 | fail-fast: false | 28 | fail-fast: false |
| 29 | matrix: | 29 | matrix: |
| 30 | os: [ubuntu-latest, windows-latest, macos-latest] | 30 | os: [ubuntu-latest, windows-latest, macos-latest] |
| 31 | - python-version: ["3.7", "3.8", "3.9", "3.10"] | 31 | + python-version: ["3.8", "3.9", "3.10"] |
| 32 | 32 | ||
| 33 | steps: | 33 | steps: |
| 34 | - uses: actions/checkout@v2 | 34 | - uses: actions/checkout@v2 |
| @@ -8,10 +8,6 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin) | @@ -8,10 +8,6 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin) | ||
| 8 | message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}") | 8 | message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}") |
| 9 | endif() | 9 | endif() |
| 10 | 10 | ||
| 11 | -if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL arm64) | ||
| 12 | - message(FATAL_ERROR "This file is for arm64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}") | ||
| 13 | -endif() | ||
| 14 | - | ||
| 15 | set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.15.1/onnxruntime-osx-arm64-1.15.1.tgz") | 11 | set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.15.1/onnxruntime-osx-arm64-1.15.1.tgz") |
| 16 | set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/onnxruntime-osx-arm64-1.15.1.tgz") | 12 | set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/onnxruntime-osx-arm64-1.15.1.tgz") |
| 17 | set(onnxruntime_HASH "SHA256=df97832fc7907c6677a6da437f92339d84a462becb74b1d65217fcb859ee9460") | 13 | set(onnxruntime_HASH "SHA256=df97832fc7907c6677a6da437f92339d84a462becb74b1d65217fcb859ee9460") |
| @@ -8,10 +8,6 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin) | @@ -8,10 +8,6 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin) | ||
| 8 | message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}") | 8 | message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}") |
| 9 | endif() | 9 | endif() |
| 10 | 10 | ||
| 11 | -if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) | ||
| 12 | - message(FATAL_ERROR "This file is for x86_64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}") | ||
| 13 | -endif() | ||
| 14 | - | ||
| 15 | set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.15.1/onnxruntime-osx-x86_64-1.15.1.tgz") | 11 | set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.15.1/onnxruntime-osx-x86_64-1.15.1.tgz") |
| 16 | set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/onnxruntime-osx-x86_64-1.15.1.tgz") | 12 | set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/onnxruntime-osx-x86_64-1.15.1.tgz") |
| 17 | set(onnxruntime_HASH "SHA256=4b66ebbca24b8b96f6b74655fee3610a7e529b4e01f6790632f24ee82b778e5a") | 13 | set(onnxruntime_HASH "SHA256=4b66ebbca24b8b96f6b74655fee3610a7e529b4e01f6790632f24ee82b778e5a") |
| @@ -16,12 +16,18 @@ function(download_onnxruntime) | @@ -16,12 +16,18 @@ function(download_onnxruntime) | ||
| 16 | include(onnxruntime-linux-x86_64) | 16 | include(onnxruntime-linux-x86_64) |
| 17 | endif() | 17 | endif() |
| 18 | elseif(CMAKE_SYSTEM_NAME STREQUAL Darwin) | 18 | elseif(CMAKE_SYSTEM_NAME STREQUAL Darwin) |
| 19 | - if (arm64 IN_LIST CMAKE_OSX_ARCHITECTURES OR x86_64 IN_LIST CMAKE_OSX_ARCHITECTURES) | ||
| 20 | - include(onnxruntime-darwin-universal) | ||
| 21 | - elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) | ||
| 22 | - include(onnxruntime-darwin-x86_64) | 19 | + if (arm64 IN_LIST CMAKE_OSX_ARCHITECTURES AND x86_64 IN_LIST CMAKE_OSX_ARCHITECTURES) |
| 20 | + include(onnxruntime-osx-universal) | ||
| 21 | + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64") | ||
| 22 | + # cross compiling | ||
| 23 | + include(onnxruntime-osx-arm64) | ||
| 24 | + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64 AND CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64") | ||
| 25 | + # cross compiling | ||
| 26 | + include(onnxruntime-osx-x86_64) | ||
| 23 | elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64) | 27 | elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64) |
| 24 | - include(onnxruntime-darwin-arm64) | 28 | + include(onnxruntime-osx-arm64) |
| 29 | + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) | ||
| 30 | + include(onnxruntime-osx-x86_64) | ||
| 25 | else() | 31 | else() |
| 26 | message(FATAL_ERROR "Unsupport processor {CMAKE_SYSTEM_PROCESSOR} for Darwin") | 32 | message(FATAL_ERROR "Unsupport processor {CMAKE_SYSTEM_PROCESSOR} for Darwin") |
| 27 | endif() | 33 | endif() |
| 1 | +module non-streaming-decode-files | ||
| 2 | + | ||
| 3 | +go 1.20 | ||
| 4 | + | ||
| 5 | +require ( | ||
| 6 | + github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 | ||
| 7 | + github.com/spf13/pflag v1.0.5 | ||
| 8 | + github.com/youpy/go-wav v0.3.2 | ||
| 9 | +) | ||
| 10 | + | ||
| 11 | +require ( | ||
| 12 | + github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 // indirect | ||
| 13 | + github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 // indirect | ||
| 14 | + github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 // indirect | ||
| 15 | + github.com/youpy/go-riff v0.1.0 // indirect | ||
| 16 | + github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect | ||
| 17 | +) |
| 1 | +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= | ||
| 2 | +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||
| 3 | +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= | ||
| 4 | +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= | ||
| 5 | +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 h1:BXc31pWwd7CJLM6m9HavxqiyYJdN3Jc9I26pd4x+JHE= | ||
| 6 | +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8/go.mod h1:kszL/pwg9XTpRGi1AYW/aSwdhRBqb6LN0SjXR0jnsBo= | ||
| 7 | +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 h1:o0+l4Wr3IWkWH+kdt8ZZP55L8mRSpW7h1KzvXcfx9FM= | ||
| 8 | +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6/go.mod h1:9kU02PSdDdzBApwIDBmE2jWS54WvZbRafOEvW/PVLxE= | ||
| 9 | +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 h1:Xri23R3+tQFkNwO6zVxqZbjMRJP40z7JtoSqW6UP6sM= | ||
| 10 | +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4/go.mod h1:a+AJZKNQkFO+JyzGkHySysYfBzzdcoJI5ITFsnhVcmo= | ||
| 11 | +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 h1:OR1LLoptR8W35j5u06KrYUblG35B83HHCPWrVK31uHY= | ||
| 12 | +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5/go.mod h1:ermMOETZUv0nM7MmnXWqeHREMR6zQXBhJpEP2fbHIZo= | ||
| 13 | +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= | ||
| 14 | +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= | ||
| 15 | +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | ||
| 16 | +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | ||
| 17 | +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= | ||
| 18 | +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= | ||
| 19 | +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= | ||
| 20 | +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= | ||
| 21 | +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= | ||
| 22 | +github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k= | ||
| 23 | +github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ= | ||
| 24 | +github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU= | ||
| 25 | +github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50= | ||
| 26 | +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634= | ||
| 27 | +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8= | ||
| 28 | +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= | ||
| 29 | +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | ||
| 30 | +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= | ||
| 31 | +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= | ||
| 32 | +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= | ||
| 33 | +gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= |
| 1 | +package main | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + "bytes" | ||
| 5 | + "encoding/binary" | ||
| 6 | + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
| 7 | + flag "github.com/spf13/pflag" | ||
| 8 | + "github.com/youpy/go-wav" | ||
| 9 | + "os" | ||
| 10 | + "strings" | ||
| 11 | + | ||
| 12 | + "log" | ||
| 13 | +) | ||
| 14 | + | ||
| 15 | +func main() { | ||
| 16 | + | ||
| 17 | + log.SetFlags(log.LstdFlags | log.Lmicroseconds) | ||
| 18 | + | ||
| 19 | + config := sherpa.OfflineRecognizerConfig{} | ||
| 20 | + config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} | ||
| 21 | + | ||
| 22 | + flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the encoder model") | ||
| 23 | + flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the decoder model") | ||
| 24 | + flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the joiner model") | ||
| 25 | + flag.StringVar(&config.ModelConfig.Paraformer.Model, "paraformer", "", "Path to the paraformer model") | ||
| 26 | + flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model") | ||
| 27 | + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") | ||
| 28 | + flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") | ||
| 29 | + flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") | ||
| 30 | + flag.StringVar(&config.ModelConfig.ModelType, "model-type", "", "Optional. Used for loading the model in a faster way") | ||
| 31 | + flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use") | ||
| 32 | + flag.StringVar(&config.LmConfig.Model, "lm-model", "", "Optional. Path to the LM model") | ||
| 33 | + flag.Float32Var(&config.LmConfig.Scale, "lm-scale", 1.0, "Optional. Scale for the LM model") | ||
| 34 | + | ||
| 35 | + flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search") | ||
| 36 | + flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") | ||
| 37 | + | ||
| 38 | + flag.Parse() | ||
| 39 | + | ||
| 40 | + if len(flag.Args()) != 1 { | ||
| 41 | + log.Fatalf("Please provide one wave file") | ||
| 42 | + } | ||
| 43 | + | ||
| 44 | + log.Println("Reading", flag.Arg(0)) | ||
| 45 | + | ||
| 46 | + samples, sampleRate := readWave(flag.Arg(0)) | ||
| 47 | + | ||
| 48 | + log.Println("Initializing recognizer (may take several seconds)") | ||
| 49 | + recognizer := sherpa.NewOfflineRecognizer(&config) | ||
| 50 | + log.Println("Recognizer created!") | ||
| 51 | + defer sherpa.DeleteOfflineRecognizer(recognizer) | ||
| 52 | + | ||
| 53 | + log.Println("Start decoding!") | ||
| 54 | + stream := sherpa.NewOfflineStream(recognizer) | ||
| 55 | + defer sherpa.DeleteOfflineStream(stream) | ||
| 56 | + | ||
| 57 | + stream.AcceptWaveform(sampleRate, samples) | ||
| 58 | + | ||
| 59 | + recognizer.Decode(stream) | ||
| 60 | + log.Println("Decoding done!") | ||
| 61 | + result := stream.GetResult() | ||
| 62 | + | ||
| 63 | + log.Println(strings.ToLower(result.Text)) | ||
| 64 | + log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate)) | ||
| 65 | +} | ||
| 66 | + | ||
| 67 | +func readWave(filename string) (samples []float32, sampleRate int) { | ||
| 68 | + file, _ := os.Open(filename) | ||
| 69 | + defer file.Close() | ||
| 70 | + | ||
| 71 | + reader := wav.NewReader(file) | ||
| 72 | + format, err := reader.Format() | ||
| 73 | + if err != nil { | ||
| 74 | + log.Fatalf("Failed to read wave format") | ||
| 75 | + } | ||
| 76 | + | ||
| 77 | + if format.AudioFormat != 1 { | ||
| 78 | + log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat) | ||
| 79 | + } | ||
| 80 | + | ||
| 81 | + if format.NumChannels != 1 { | ||
| 82 | + log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels) | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + if format.BitsPerSample != 16 { | ||
| 86 | + log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample) | ||
| 87 | + } | ||
| 88 | + | ||
| 89 | + reader.Duration() // so that it initializes reader.Size | ||
| 90 | + | ||
| 91 | + buf := make([]byte, reader.Size) | ||
| 92 | + n, err := reader.Read(buf) | ||
| 93 | + if n != int(reader.Size) { | ||
| 94 | + log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n) | ||
| 95 | + } | ||
| 96 | + | ||
| 97 | + samples = samplesInt16ToFloat(buf) | ||
| 98 | + sampleRate = int(format.SampleRate) | ||
| 99 | + | ||
| 100 | + return | ||
| 101 | +} | ||
| 102 | + | ||
| 103 | +func samplesInt16ToFloat(inSamples []byte) []float32 { | ||
| 104 | + numSamples := len(inSamples) / 2 | ||
| 105 | + outSamples := make([]float32, numSamples) | ||
| 106 | + | ||
| 107 | + for i := 0; i != numSamples; i++ { | ||
| 108 | + s := inSamples[i*2 : (i+1)*2] | ||
| 109 | + | ||
| 110 | + var s16 int16 | ||
| 111 | + buf := bytes.NewReader(s) | ||
| 112 | + err := binary.Read(buf, binary.LittleEndian, &s16) | ||
| 113 | + if err != nil { | ||
| 114 | + log.Fatal("Failed to parse 16-bit sample") | ||
| 115 | + } | ||
| 116 | + outSamples[i] = float32(s16) / 32768 | ||
| 117 | + } | ||
| 118 | + | ||
| 119 | + return outSamples | ||
| 120 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# Please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/english.html#stt-en-conformer-ctc-medium | ||
| 5 | +# to download the model | ||
| 6 | +# before you run this script. | ||
| 7 | +# | ||
| 8 | +# You can switch to a different online model if you need | ||
| 9 | + | ||
| 10 | +./non-streaming-decode-files \ | ||
| 11 | + --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \ | ||
| 12 | + --tokens ./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \ | ||
| 13 | + --model-type nemo_ctc \ | ||
| 14 | + --debug 0 \ | ||
| 15 | + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# Please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese | ||
| 5 | +# to download the model | ||
| 6 | +# before you run this script. | ||
| 7 | +# | ||
| 8 | +# You can switch to a different online model if you need | ||
| 9 | + | ||
| 10 | +./non-streaming-decode-files \ | ||
| 11 | + --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ | ||
| 12 | + --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ | ||
| 13 | + --model-type paraformer \ | ||
| 14 | + --debug 0 \ | ||
| 15 | + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# Please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-zipformer-en-2023-06-26-english | ||
| 5 | +# to download the model | ||
| 6 | +# before you run this script. | ||
| 7 | +# | ||
| 8 | +# You can switch to a different online model if you need | ||
| 9 | + | ||
| 10 | +./non-streaming-decode-files \ | ||
| 11 | + --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \ | ||
| 12 | + --decoder ./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx \ | ||
| 13 | + --joiner ./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx \ | ||
| 14 | + --tokens ./sherpa-onnx-zipformer-en-2023-06-26/tokens.txt \ | ||
| 15 | + --model-type transducer \ | ||
| 16 | + --debug 0 \ | ||
| 17 | + ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav |
| 1 | +real-time-speech-recognition-from-microphone |
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +This examples shows how to use the golang package of [sherpa-onnx][sherpa-onnx] | ||
| 4 | +for real-time speech recognition from microphone. | ||
| 5 | + | ||
| 6 | +It uses <https://github.com/gordonklaus/portaudio> | ||
| 7 | +to read the microphone and you have to install `portaudio` first. | ||
| 8 | + | ||
| 9 | +On macOS, you can use | ||
| 10 | + | ||
| 11 | +``` | ||
| 12 | +brew install portaudio | ||
| 13 | +``` | ||
| 14 | + | ||
| 15 | +and it will install `portaudio` into `/usr/local/Cellar/portaudio/19.7.0`. | ||
| 16 | +You need to set the following environment variable | ||
| 17 | +``` | ||
| 18 | +export PKG_CONFIG_PATH=/usr/local/Cellar/portaudio/19.7.0 | ||
| 19 | +``` | ||
| 20 | + | ||
| 21 | +so that `pkg-config --cflags --libs portaudio-2.0` can run successfully. | ||
| 22 | + | ||
| 23 | +[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx |
| 1 | +module real-time-speech-recognition-from-microphone | ||
| 2 | + | ||
| 3 | +go 1.20 | ||
| 4 | + | ||
| 5 | +require ( | ||
| 6 | + github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 | ||
| 7 | + github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 | ||
| 8 | + github.com/spf13/pflag v1.0.5 | ||
| 9 | +) | ||
| 10 | + | ||
| 11 | +require ( | ||
| 12 | + github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 // indirect | ||
| 13 | + github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 // indirect | ||
| 14 | + github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 // indirect | ||
| 15 | +) |
| 1 | +github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 h1:5AlozfqaVjGYGhms2OsdUyfdJME76E6rx5MdGpjzZpc= | ||
| 2 | +github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es= | ||
| 3 | +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 h1:BXc31pWwd7CJLM6m9HavxqiyYJdN3Jc9I26pd4x+JHE= | ||
| 4 | +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8/go.mod h1:kszL/pwg9XTpRGi1AYW/aSwdhRBqb6LN0SjXR0jnsBo= | ||
| 5 | +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 h1:o0+l4Wr3IWkWH+kdt8ZZP55L8mRSpW7h1KzvXcfx9FM= | ||
| 6 | +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6/go.mod h1:9kU02PSdDdzBApwIDBmE2jWS54WvZbRafOEvW/PVLxE= | ||
| 7 | +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 h1:Xri23R3+tQFkNwO6zVxqZbjMRJP40z7JtoSqW6UP6sM= | ||
| 8 | +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4/go.mod h1:a+AJZKNQkFO+JyzGkHySysYfBzzdcoJI5ITFsnhVcmo= | ||
| 9 | +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 h1:OR1LLoptR8W35j5u06KrYUblG35B83HHCPWrVK31uHY= | ||
| 10 | +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5/go.mod h1:ermMOETZUv0nM7MmnXWqeHREMR6zQXBhJpEP2fbHIZo= | ||
| 11 | +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= | ||
| 12 | +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= |
| 1 | +package main | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + "github.com/gordonklaus/portaudio" | ||
| 5 | + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
| 6 | + flag "github.com/spf13/pflag" | ||
| 7 | + "strings" | ||
| 8 | + | ||
| 9 | + "fmt" | ||
| 10 | + "log" | ||
| 11 | +) | ||
| 12 | + | ||
| 13 | +func main() { | ||
| 14 | + err := portaudio.Initialize() | ||
| 15 | + if err != nil { | ||
| 16 | + log.Fatalf("Unable to initialize portaudio: %v\n", err) | ||
| 17 | + } | ||
| 18 | + defer portaudio.Terminate() | ||
| 19 | + | ||
| 20 | + default_device, err := portaudio.DefaultInputDevice() | ||
| 21 | + if err != nil { | ||
| 22 | + log.Fatal("Failed to get default input device: %v\n", err) | ||
| 23 | + } | ||
| 24 | + fmt.Printf("Select default input device: %s\n", default_device.Name) | ||
| 25 | + param := portaudio.StreamParameters{} | ||
| 26 | + param.Input.Device = default_device | ||
| 27 | + param.Input.Channels = 1 | ||
| 28 | + param.Input.Latency = default_device.DefaultLowInputLatency | ||
| 29 | + | ||
| 30 | + param.SampleRate = 16000 | ||
| 31 | + param.FramesPerBuffer = 0 | ||
| 32 | + param.Flags = portaudio.ClipOff | ||
| 33 | + | ||
| 34 | + config := sherpa.OnlineRecognizerConfig{} | ||
| 35 | + config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} | ||
| 36 | + | ||
| 37 | + flag.StringVar(&config.ModelConfig.Encoder, "encoder", "", "Path to the encoder model") | ||
| 38 | + flag.StringVar(&config.ModelConfig.Decoder, "decoder", "", "Path to the decoder model") | ||
| 39 | + flag.StringVar(&config.ModelConfig.Joiner, "joiner", "", "Path to the joiner model") | ||
| 40 | + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") | ||
| 41 | + flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") | ||
| 42 | + flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") | ||
| 43 | + flag.StringVar(&config.ModelConfig.ModelType, "model-type", "", "Optional. Used for loading the model in a faster way") | ||
| 44 | + flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use") | ||
| 45 | + flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search") | ||
| 46 | + flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") | ||
| 47 | + flag.IntVar(&config.EnableEndpoint, "enable-endpoint", 1, "Whether to enable endpoint") | ||
| 48 | + flag.Float32Var(&config.Rule1MinTrailingSilence, "rule1-min-trailing-silence", 2.4, "Threshold for rule1") | ||
| 49 | + flag.Float32Var(&config.Rule2MinTrailingSilence, "rule2-min-trailing-silence", 1.2, "Threshold for rule2") | ||
| 50 | + flag.Float32Var(&config.Rule3MinUtteranceLength, "rule3-min-utterance-length", 20, "Threshold for rule3") | ||
| 51 | + | ||
| 52 | + flag.Parse() | ||
| 53 | + | ||
| 54 | + log.Println("Initializing recognizer (may take several seconds)") | ||
| 55 | + recognizer := sherpa.NewOnlineRecognizer(&config) | ||
| 56 | + log.Println("Recognizer created!") | ||
| 57 | + defer sherpa.DeleteOnlineRecognizer(recognizer) | ||
| 58 | + | ||
| 59 | + stream := sherpa.NewOnlineStream(recognizer) | ||
| 60 | + | ||
| 61 | + // you can choose another value for 0.1 if you want | ||
| 62 | + samplesPerCall := int32(param.SampleRate * 0.1) // 0.1 second | ||
| 63 | + | ||
| 64 | + samples := make([]float32, samplesPerCall) | ||
| 65 | + s, err := portaudio.OpenStream(param, samples) | ||
| 66 | + if err != nil { | ||
| 67 | + log.Fatalf("Failed to open the stream") | ||
| 68 | + } | ||
| 69 | + defer s.Close() | ||
| 70 | + chk(s.Start()) | ||
| 71 | + | ||
| 72 | + var last_text string | ||
| 73 | + | ||
| 74 | + segment_idx := 0 | ||
| 75 | + | ||
| 76 | + fmt.Println("Started! Please speak") | ||
| 77 | + | ||
| 78 | + for { | ||
| 79 | + chk(s.Read()) | ||
| 80 | + stream.AcceptWaveform(int(param.SampleRate), samples) | ||
| 81 | + | ||
| 82 | + for recognizer.IsReady(stream) { | ||
| 83 | + recognizer.Decode(stream) | ||
| 84 | + } | ||
| 85 | + | ||
| 86 | + text := recognizer.GetResult(stream).Text | ||
| 87 | + if len(text) != 0 && last_text != text { | ||
| 88 | + last_text = strings.ToLower(text) | ||
| 89 | + fmt.Printf("\r%d: %s", segment_idx, last_text) | ||
| 90 | + } | ||
| 91 | + | ||
| 92 | + if recognizer.IsEndpoint(stream) { | ||
| 93 | + if len(text) != 0 { | ||
| 94 | + segment_idx++ | ||
| 95 | + fmt.Println() | ||
| 96 | + } | ||
| 97 | + recognizer.Reset(stream) | ||
| 98 | + } | ||
| 99 | + } | ||
| 100 | + | ||
| 101 | + chk(s.Stop()) | ||
| 102 | + return | ||
| 103 | + | ||
| 104 | +} | ||
| 105 | + | ||
| 106 | +func chk(err error) { | ||
| 107 | + if err != nil { | ||
| 108 | + panic(err) | ||
| 109 | + } | ||
| 110 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# Please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-streaming-wenetspeech-20230615-chinese | ||
| 5 | +# to download the model | ||
| 6 | +# before you run this script. | ||
| 7 | +# | ||
| 8 | +# You can switch to different online models if you need | ||
| 9 | + | ||
| 10 | +./real-time-speech-recognition-from-microphone \ | ||
| 11 | + --encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \ | ||
| 12 | + --decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \ | ||
| 13 | + --joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx \ | ||
| 14 | + --tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \ | ||
| 15 | + --model-type zipformer2 |
| 1 | +streaming-decode-files |
| 1 | +module streaming-decode-files | ||
| 2 | + | ||
| 3 | +go 1.20 | ||
| 4 | + | ||
| 5 | +require ( | ||
| 6 | + github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 | ||
| 7 | + github.com/spf13/pflag v1.0.5 | ||
| 8 | + github.com/youpy/go-wav v0.3.2 | ||
| 9 | +) | ||
| 10 | + | ||
| 11 | +require ( | ||
| 12 | + github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 // indirect | ||
| 13 | + github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 // indirect | ||
| 14 | + github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 // indirect | ||
| 15 | + github.com/youpy/go-riff v0.1.0 // indirect | ||
| 16 | + github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect | ||
| 17 | +) |
| 1 | +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= | ||
| 2 | +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||
| 3 | +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= | ||
| 4 | +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= | ||
| 5 | +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 h1:BXc31pWwd7CJLM6m9HavxqiyYJdN3Jc9I26pd4x+JHE= | ||
| 6 | +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8/go.mod h1:kszL/pwg9XTpRGi1AYW/aSwdhRBqb6LN0SjXR0jnsBo= | ||
| 7 | +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 h1:o0+l4Wr3IWkWH+kdt8ZZP55L8mRSpW7h1KzvXcfx9FM= | ||
| 8 | +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6/go.mod h1:9kU02PSdDdzBApwIDBmE2jWS54WvZbRafOEvW/PVLxE= | ||
| 9 | +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 h1:Xri23R3+tQFkNwO6zVxqZbjMRJP40z7JtoSqW6UP6sM= | ||
| 10 | +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4/go.mod h1:a+AJZKNQkFO+JyzGkHySysYfBzzdcoJI5ITFsnhVcmo= | ||
| 11 | +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 h1:OR1LLoptR8W35j5u06KrYUblG35B83HHCPWrVK31uHY= | ||
| 12 | +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5/go.mod h1:ermMOETZUv0nM7MmnXWqeHREMR6zQXBhJpEP2fbHIZo= | ||
| 13 | +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= | ||
| 14 | +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= | ||
| 15 | +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | ||
| 16 | +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | ||
| 17 | +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= | ||
| 18 | +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= | ||
| 19 | +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= | ||
| 20 | +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= | ||
| 21 | +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= | ||
| 22 | +github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k= | ||
| 23 | +github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ= | ||
| 24 | +github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU= | ||
| 25 | +github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50= | ||
| 26 | +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634= | ||
| 27 | +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8= | ||
| 28 | +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= | ||
| 29 | +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | ||
| 30 | +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= | ||
| 31 | +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= | ||
| 32 | +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= | ||
| 33 | +gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= |
| 1 | +package main | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + "bytes" | ||
| 5 | + "encoding/binary" | ||
| 6 | + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
| 7 | + flag "github.com/spf13/pflag" | ||
| 8 | + "github.com/youpy/go-wav" | ||
| 9 | + "os" | ||
| 10 | + "strings" | ||
| 11 | + | ||
| 12 | + "log" | ||
| 13 | +) | ||
| 14 | + | ||
| 15 | +func main() { | ||
| 16 | + log.SetFlags(log.LstdFlags | log.Lmicroseconds) | ||
| 17 | + | ||
| 18 | + config := sherpa.OnlineRecognizerConfig{} | ||
| 19 | + config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} | ||
| 20 | + | ||
| 21 | + flag.StringVar(&config.ModelConfig.Encoder, "encoder", "", "Path to the encoder model") | ||
| 22 | + flag.StringVar(&config.ModelConfig.Decoder, "decoder", "", "Path to the decoder model") | ||
| 23 | + flag.StringVar(&config.ModelConfig.Joiner, "joiner", "", "Path to the joiner model") | ||
| 24 | + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") | ||
| 25 | + flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") | ||
| 26 | + flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") | ||
| 27 | + flag.StringVar(&config.ModelConfig.ModelType, "model-type", "", "Optional. Used for loading the model in a faster way") | ||
| 28 | + flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use") | ||
| 29 | + flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search") | ||
| 30 | + flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") | ||
| 31 | + | ||
| 32 | + flag.Parse() | ||
| 33 | + | ||
| 34 | + if len(flag.Args()) != 1 { | ||
| 35 | + log.Fatalf("Please provide one wave file") | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + log.Println("Reading", flag.Arg(0)) | ||
| 39 | + | ||
| 40 | + samples, sampleRate := readWave(flag.Arg(0)) | ||
| 41 | + | ||
| 42 | + log.Println("Initializing recognizer (may take several seconds)") | ||
| 43 | + recognizer := sherpa.NewOnlineRecognizer(&config) | ||
| 44 | + log.Println("Recognizer created!") | ||
| 45 | + defer sherpa.DeleteOnlineRecognizer(recognizer) | ||
| 46 | + | ||
| 47 | + log.Println("Start decoding!") | ||
| 48 | + stream := sherpa.NewOnlineStream(recognizer) | ||
| 49 | + defer sherpa.DeleteOnlineStream(stream) | ||
| 50 | + | ||
| 51 | + stream.AcceptWaveform(sampleRate, samples) | ||
| 52 | + | ||
| 53 | + tailPadding := make([]float32, int(float32(sampleRate)*0.3)) | ||
| 54 | + stream.AcceptWaveform(sampleRate, tailPadding) | ||
| 55 | + | ||
| 56 | + for recognizer.IsReady(stream) { | ||
| 57 | + recognizer.Decode(stream) | ||
| 58 | + } | ||
| 59 | + log.Println("Decoding done!") | ||
| 60 | + result := recognizer.GetResult(stream) | ||
| 61 | + log.Println(strings.ToLower(result.Text)) | ||
| 62 | + log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate)) | ||
| 63 | +} | ||
| 64 | + | ||
| 65 | +func readWave(filename string) (samples []float32, sampleRate int) { | ||
| 66 | + file, _ := os.Open(filename) | ||
| 67 | + defer file.Close() | ||
| 68 | + | ||
| 69 | + reader := wav.NewReader(file) | ||
| 70 | + format, err := reader.Format() | ||
| 71 | + if err != nil { | ||
| 72 | + log.Fatalf("Failed to read wave format") | ||
| 73 | + } | ||
| 74 | + | ||
| 75 | + if format.AudioFormat != 1 { | ||
| 76 | + log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat) | ||
| 77 | + } | ||
| 78 | + | ||
| 79 | + if format.NumChannels != 1 { | ||
| 80 | + log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels) | ||
| 81 | + } | ||
| 82 | + | ||
| 83 | + if format.BitsPerSample != 16 { | ||
| 84 | + log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample) | ||
| 85 | + } | ||
| 86 | + | ||
| 87 | + reader.Duration() // so that it initializes reader.Size | ||
| 88 | + | ||
| 89 | + buf := make([]byte, reader.Size) | ||
| 90 | + n, err := reader.Read(buf) | ||
| 91 | + if n != int(reader.Size) { | ||
| 92 | + log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n) | ||
| 93 | + } | ||
| 94 | + | ||
| 95 | + samples = samplesInt16ToFloat(buf) | ||
| 96 | + sampleRate = int(format.SampleRate) | ||
| 97 | + | ||
| 98 | + return | ||
| 99 | +} | ||
| 100 | + | ||
| 101 | +func samplesInt16ToFloat(inSamples []byte) []float32 { | ||
| 102 | + numSamples := len(inSamples) / 2 | ||
| 103 | + outSamples := make([]float32, numSamples) | ||
| 104 | + | ||
| 105 | + for i := 0; i != numSamples; i++ { | ||
| 106 | + s := inSamples[i*2 : (i+1)*2] | ||
| 107 | + | ||
| 108 | + var s16 int16 | ||
| 109 | + buf := bytes.NewReader(s) | ||
| 110 | + err := binary.Read(buf, binary.LittleEndian, &s16) | ||
| 111 | + if err != nil { | ||
| 112 | + log.Fatal("Failed to parse 16-bit sample") | ||
| 113 | + } | ||
| 114 | + outSamples[i] = float32(s16) / 32768 | ||
| 115 | + } | ||
| 116 | + | ||
| 117 | + return outSamples | ||
| 118 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# Please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english | ||
| 5 | +# to download the model | ||
| 6 | +# before you run this script. | ||
| 7 | +# | ||
| 8 | +# You can switch to a different online model if you need | ||
| 9 | + | ||
| 10 | +./streaming-decode-files \ | ||
| 11 | + --encoder ./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-128.onnx \ | ||
| 12 | + --decoder ./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-128.onnx \ | ||
| 13 | + --joiner ./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-128.onnx \ | ||
| 14 | + --tokens ./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt \ | ||
| 15 | + --model-type zipformer2 \ | ||
| 16 | + --debug 0 \ | ||
| 17 | + ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav |
scripts/go/release.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +git config --global user.email "csukuangfj@gmail.com" | ||
| 6 | +git config --global user.name "Fangjun Kuang" | ||
| 7 | + | ||
| 8 | +SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 9 | + | ||
| 10 | +echo "=========================================================================" | ||
| 11 | + | ||
| 12 | +git clone git@github.com:k2-fsa/sherpa-onnx-go-linux.git | ||
| 13 | + | ||
| 14 | +echo "Copy libs for Linux x86_64" | ||
| 15 | + | ||
| 16 | +rm -rf sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/lib* | ||
| 17 | + | ||
| 18 | +cp -v ./linux/sherpa_onnx/lib/libkaldi-native-fbank-core.so sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/ | ||
| 19 | +cp -v ./linux/sherpa_onnx/lib/libonnxruntime* sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/ | ||
| 20 | +cp -v ./linux/sherpa_onnx/lib/libsherpa-onnx-c-api.so sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/ | ||
| 21 | +cp -v ./linux/sherpa_onnx/lib/libsherpa-onnx-core.so sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/ | ||
| 22 | + | ||
| 23 | +echo "Copy sources for Linux x86_64" | ||
| 24 | +cp sherpa-onnx/c-api/c-api.h sherpa-onnx-go-linux/ | ||
| 25 | +cp scripts/go/sherpa_onnx.go sherpa-onnx-go-linux/ | ||
| 26 | + | ||
| 27 | +pushd sherpa-onnx-go-linux | ||
| 28 | +tag=$(git describe --abbrev=0 --tags) | ||
| 29 | +if [[ x"$VERSION" == x"auto" ]]; then | ||
| 30 | + # this is a pre-release | ||
| 31 | + if [[ $tag == ${SHERPA_ONNX_VERSION}* ]]; then | ||
| 32 | + # echo we have already release pre-release before, so just increment it | ||
| 33 | + last=$(echo $tag | rev | cut -d'.' -f 1 | rev) | ||
| 34 | + new_last=$((last+1)) | ||
| 35 | + new_tag=${SHERPA_ONNX_VERSION}-alpha.${new_last} | ||
| 36 | + else | ||
| 37 | + new_tag=${SHERPA_ONNX_VERSION}-alpha.1 | ||
| 38 | + fi | ||
| 39 | +else | ||
| 40 | + new_tag=$VERSION | ||
| 41 | +fi | ||
| 42 | + | ||
| 43 | +echo "new_tag: $new_tag" | ||
| 44 | +git add . | ||
| 45 | +git status | ||
| 46 | +git commit -m "Release $new_tag" && \ | ||
| 47 | +git tag $new_tag && \ | ||
| 48 | +git push origin $new_tag || true | ||
| 49 | + | ||
| 50 | +popd | ||
| 51 | +echo "=========================================================================" | ||
| 52 | + | ||
| 53 | +git clone git@github.com:k2-fsa/sherpa-onnx-go-macos.git | ||
| 54 | + | ||
| 55 | +echo "Copy libs for macOS x86_64" | ||
| 56 | +rm -rf sherpa-onnx-go-macos/lib/x86_64-apple-darwin/lib* | ||
| 57 | +cp -v ./macos-x86_64/libkaldi-native-fbank-core.dylib sherpa-onnx-go-macos/lib/x86_64-apple-darwin | ||
| 58 | +cp -v ./macos-x86_64/libonnxruntime* sherpa-onnx-go-macos/lib/x86_64-apple-darwin | ||
| 59 | +cp -v ./macos-x86_64/libsherpa-onnx-c-api.dylib sherpa-onnx-go-macos/lib/x86_64-apple-darwin | ||
| 60 | +cp -v ./macos-x86_64/libsherpa-onnx-core.dylib sherpa-onnx-go-macos/lib/x86_64-apple-darwin | ||
| 61 | + | ||
| 62 | +echo "Copy libs for macOS arm64" | ||
| 63 | +rm -rf sherpa-onnx-go-macos/lib/aarch64-apple-darwin/lib* | ||
| 64 | +cp -v ./macos-arm64/libkaldi-native-fbank-core.dylib sherpa-onnx-go-macos/lib/aarch64-apple-darwin | ||
| 65 | +cp -v ./macos-arm64/libonnxruntime* sherpa-onnx-go-macos/lib/aarch64-apple-darwin | ||
| 66 | +cp -v ./macos-arm64/libsherpa-onnx-c-api.dylib sherpa-onnx-go-macos/lib/aarch64-apple-darwin | ||
| 67 | +cp -v ./macos-arm64/libsherpa-onnx-core.dylib sherpa-onnx-go-macos/lib/aarch64-apple-darwin | ||
| 68 | + | ||
| 69 | +echo "Copy sources for macOS" | ||
| 70 | +cp sherpa-onnx/c-api/c-api.h sherpa-onnx-go-macos/ | ||
| 71 | +cp scripts/go/sherpa_onnx.go sherpa-onnx-go-macos/ | ||
| 72 | + | ||
| 73 | +pushd sherpa-onnx-go-macos | ||
| 74 | +tag=$(git describe --abbrev=0 --tags) | ||
| 75 | +if [[ x"$VERSION" == x"auto" ]]; then | ||
| 76 | + # this is a pre-release | ||
| 77 | + if [[ $tag == ${SHERPA_ONNX_VERSION}* ]]; then | ||
| 78 | + # echo we have already release pre-release before, so just increment it | ||
| 79 | + last=$(echo $tag | rev | cut -d'.' -f 1 | rev) | ||
| 80 | + new_last=$((last+1)) | ||
| 81 | + new_tag=${SHERPA_ONNX_VERSION}-alpha.${new_last} | ||
| 82 | + else | ||
| 83 | + new_tag=${SHERPA_ONNX_VERSION}-alpha.1 | ||
| 84 | + fi | ||
| 85 | +else | ||
| 86 | + new_tag=$VERSION | ||
| 87 | +fi | ||
| 88 | + | ||
| 89 | +echo "new_tag: $new_tag" | ||
| 90 | +git add . | ||
| 91 | +git status | ||
| 92 | +git commit -m "Release $new_tag" && \ | ||
| 93 | +git tag $new_tag && \ | ||
| 94 | +git push origin $new_tag || true | ||
| 95 | + | ||
| 96 | +popd | ||
| 97 | +echo "=========================================================================" | ||
| 98 | + | ||
| 99 | +git clone git@github.com:k2-fsa/sherpa-onnx-go-windows.git | ||
| 100 | +echo "Copy libs for Windows x86_64" | ||
| 101 | +rm -fv sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu/* | ||
| 102 | +cp -v ./windows-x64/kaldi-native-fbank-core.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu | ||
| 103 | +cp -v ./windows-x64/onnxruntime.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu | ||
| 104 | +cp -v ./windows-x64/sherpa-onnx-c-api.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu | ||
| 105 | +cp -v ./windows-x64/sherpa-onnx-core.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu | ||
| 106 | + | ||
| 107 | +echo "Copy libs for Windows x86" | ||
| 108 | +rm -fv sherpa-onnx-go-windows/lib/i686-pc-windows-gnu/* | ||
| 109 | +cp -v ./windows-win32/kaldi-native-fbank-core.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu | ||
| 110 | +cp -v ./windows-win32/onnxruntime.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu | ||
| 111 | +cp -v ./windows-win32/sherpa-onnx-c-api.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu | ||
| 112 | +cp -v ./windows-win32/sherpa-onnx-core.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu | ||
| 113 | + | ||
| 114 | +echo "Copy sources for Windows" | ||
| 115 | +cp sherpa-onnx/c-api/c-api.h sherpa-onnx-go-windows/ | ||
| 116 | +cp scripts/go/sherpa_onnx.go sherpa-onnx-go-windows/ | ||
| 117 | + | ||
| 118 | +pushd sherpa-onnx-go-windows | ||
| 119 | +tag=$(git describe --abbrev=0 --tags) | ||
| 120 | +if [[ x"$VERSION" == x"auto" ]]; then | ||
| 121 | + # this is a pre-release | ||
| 122 | + if [[ $tag == ${SHERPA_ONNX_VERSION}* ]]; then | ||
| 123 | + # echo we have already release pre-release before, so just increment it | ||
| 124 | + last=$(echo $tag | rev | cut -d'.' -f 1 | rev) | ||
| 125 | + new_last=$((last+1)) | ||
| 126 | + new_tag=${SHERPA_ONNX_VERSION}-alpha.${new_last} | ||
| 127 | + else | ||
| 128 | + new_tag=${SHERPA_ONNX_VERSION}-alpha.1 | ||
| 129 | + fi | ||
| 130 | +else | ||
| 131 | + new_tag=$VERSION | ||
| 132 | +fi | ||
| 133 | + | ||
| 134 | +echo "new_tag: $new_tag" | ||
| 135 | +git add . | ||
| 136 | +git status | ||
| 137 | +git commit -m "Release $new_tag" && \ | ||
| 138 | +git tag $new_tag && \ | ||
| 139 | +git push origin $new_tag || true | ||
| 140 | + | ||
| 141 | +popd | ||
| 142 | + | ||
| 143 | +echo "=========================================================================" | ||
| 144 | + | ||
| 145 | + | ||
| 146 | +rm -fv ~/.ssh/github |
scripts/go/sherpa_onnx.go
0 → 100644
| 1 | +/* | ||
| 2 | +Speech recognition with [Next-gen Kaldi]. | ||
| 3 | + | ||
| 4 | +[sherpa-onnx] is an open-source speech recognition framework for [Next-gen Kaldi]. | ||
| 5 | +It depends only on [onnxruntime], supporting both streaming and non-streaming | ||
| 6 | +speech recognition. | ||
| 7 | + | ||
| 8 | +It does not need to access the network during recognition and everything | ||
| 9 | +runs locally. | ||
| 10 | + | ||
| 11 | +It supports a variety of platforms, such as Linux (x86_64, aarch64, arm), | ||
| 12 | +Windows (x86_64, x86), macOS (x86_64, arm64), etc. | ||
| 13 | + | ||
| 14 | +Usage examples: | ||
| 15 | + | ||
| 16 | + 1. Real-time speech recognition from a microphone | ||
| 17 | + | ||
| 18 | + Please see | ||
| 19 | + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/real-time-speech-recognition-from-microphone | ||
| 20 | + | ||
| 21 | + 2. Decode files using a non-streaming model | ||
| 22 | + | ||
| 23 | + Please see | ||
| 24 | + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/non-streaming-decode-files | ||
| 25 | + | ||
| 26 | + 3. Decode files using a streaming model | ||
| 27 | + | ||
| 28 | + Please see | ||
| 29 | + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/streaming-decode-files | ||
| 30 | + | ||
| 31 | +[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx | ||
| 32 | +[onnxruntime]: https://github.com/microsoft/onnxruntime | ||
| 33 | +[Next-gen Kaldi]: https://github.com/k2-fsa/ | ||
| 34 | +*/ | ||
| 35 | +package sherpa_onnx | ||
| 36 | + | ||
| 37 | +// #include <stdlib.h> | ||
| 38 | +// #include "c-api.h" | ||
| 39 | +import "C" | ||
| 40 | +import "unsafe" | ||
| 41 | + | ||
| 42 | +// Configuration for online/streaming transducer models | ||
| 43 | +// | ||
| 44 | +// Please refer to | ||
| 45 | +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html | ||
| 46 | +// to download pre-trained models | ||
| 47 | +type OnlineTransducerModelConfig struct { | ||
| 48 | + Encoder string // Path to the encoder model, e.g., encoder.onnx or encoder.int8.onnx | ||
| 49 | + Decoder string // Path to the decoder model. | ||
| 50 | + Joiner string // Path to the joiner model. | ||
| 51 | + Tokens string // Path to tokens.txt | ||
| 52 | + NumThreads int // Number of threads to use for neural network computation | ||
| 53 | + Provider string // Optional. Valid values are: cpu, cuda, coreml | ||
| 54 | + Debug int // 1 to show model meta information while loading it. | ||
| 55 | + ModelType string // Optional. You can specify it for faster model initialization | ||
| 56 | +} | ||
| 57 | + | ||
| 58 | +// Configuration for the feature extractor | ||
| 59 | +type FeatureConfig struct { | ||
| 60 | + // Sample rate expected by the model. It is 16000 for all | ||
| 61 | + // pre-trained models provided by us | ||
| 62 | + SampleRate int | ||
| 63 | + // Feature dimension expected by the model. It is 80 for all | ||
| 64 | + // pre-trained models provided by us | ||
| 65 | + FeatureDim int | ||
| 66 | +} | ||
| 67 | + | ||
| 68 | +// Configuration for the online/streaming recognizer. | ||
| 69 | +type OnlineRecognizerConfig struct { | ||
| 70 | + FeatConfig FeatureConfig | ||
| 71 | + ModelConfig OnlineTransducerModelConfig | ||
| 72 | + | ||
| 73 | + // Valid decoding methods: greedy_search, modified_beam_search | ||
| 74 | + DecodingMethod string | ||
| 75 | + | ||
| 76 | + // Used only when DecodingMethod is modified_beam_search. It specifies | ||
| 77 | + // the maximum number of paths to keep during the search | ||
| 78 | + MaxActivePaths int | ||
| 79 | + | ||
| 80 | + EnableEndpoint int // 1 to enable endpoint detection. | ||
| 81 | + | ||
| 82 | + // Please see | ||
| 83 | + // https://k2-fsa.github.io/sherpa/ncnn/endpoint.html | ||
| 84 | + // for the meaning of Rule1MinTrailingSilence, Rule2MinTrailingSilence | ||
| 85 | + // and Rule3MinUtteranceLength. | ||
| 86 | + Rule1MinTrailingSilence float32 | ||
| 87 | + Rule2MinTrailingSilence float32 | ||
| 88 | + Rule3MinUtteranceLength float32 | ||
| 89 | +} | ||
| 90 | + | ||
| 91 | +// It contains the recognition result for a online stream. | ||
| 92 | +type OnlineRecognizerResult struct { | ||
| 93 | + Text string | ||
| 94 | +} | ||
| 95 | + | ||
| 96 | +// The online recognizer class. It wraps a pointer from C. | ||
| 97 | +type OnlineRecognizer struct { | ||
| 98 | + impl *C.struct_SherpaOnnxOnlineRecognizer | ||
| 99 | +} | ||
| 100 | + | ||
| 101 | +// The online stream class. It wraps a pointer from C. | ||
| 102 | +type OnlineStream struct { | ||
| 103 | + impl *C.struct_SherpaOnnxOnlineStream | ||
| 104 | +} | ||
| 105 | + | ||
| 106 | +// Free the internal pointer inside the recognizer to avoid memory leak. | ||
| 107 | +func DeleteOnlineRecognizer(recognizer *OnlineRecognizer) { | ||
| 108 | + C.DestroyOnlineRecognizer(recognizer.impl) | ||
| 109 | + recognizer.impl = nil | ||
| 110 | +} | ||
| 111 | + | ||
| 112 | +// The user is responsible to invoke [DeleteOnlineRecognizer]() to free | ||
| 113 | +// the returned recognizer to avoid memory leak | ||
| 114 | +func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { | ||
| 115 | + c := C.struct_SherpaOnnxOnlineRecognizerConfig{} | ||
| 116 | + c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate) | ||
| 117 | + c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim) | ||
| 118 | + | ||
| 119 | + c.model_config.encoder = C.CString(config.ModelConfig.Encoder) | ||
| 120 | + defer C.free(unsafe.Pointer(c.model_config.encoder)) | ||
| 121 | + | ||
| 122 | + c.model_config.decoder = C.CString(config.ModelConfig.Decoder) | ||
| 123 | + defer C.free(unsafe.Pointer(c.model_config.decoder)) | ||
| 124 | + | ||
| 125 | + c.model_config.joiner = C.CString(config.ModelConfig.Joiner) | ||
| 126 | + defer C.free(unsafe.Pointer(c.model_config.joiner)) | ||
| 127 | + | ||
| 128 | + c.model_config.tokens = C.CString(config.ModelConfig.Tokens) | ||
| 129 | + defer C.free(unsafe.Pointer(c.model_config.tokens)) | ||
| 130 | + | ||
| 131 | + c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) | ||
| 132 | + | ||
| 133 | + c.model_config.provider = C.CString(config.ModelConfig.Provider) | ||
| 134 | + defer C.free(unsafe.Pointer(c.model_config.provider)) | ||
| 135 | + | ||
| 136 | + c.model_config.debug = C.int(config.ModelConfig.Debug) | ||
| 137 | + | ||
| 138 | + c.model_config.model_type = C.CString(config.ModelConfig.ModelType) | ||
| 139 | + defer C.free(unsafe.Pointer(c.model_config.model_type)) | ||
| 140 | + | ||
| 141 | + c.decoding_method = C.CString(config.DecodingMethod) | ||
| 142 | + defer C.free(unsafe.Pointer(c.decoding_method)) | ||
| 143 | + | ||
| 144 | + c.max_active_paths = C.int(config.MaxActivePaths) | ||
| 145 | + c.enable_endpoint = C.int(config.EnableEndpoint) | ||
| 146 | + c.rule1_min_trailing_silence = C.float(config.Rule1MinTrailingSilence) | ||
| 147 | + c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence) | ||
| 148 | + c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength) | ||
| 149 | + | ||
| 150 | + recognizer := &OnlineRecognizer{} | ||
| 151 | + recognizer.impl = C.CreateOnlineRecognizer(&c) | ||
| 152 | + | ||
| 153 | + return recognizer | ||
| 154 | +} | ||
| 155 | + | ||
| 156 | +// Delete the internal pointer inside the stream to avoid memory leak. | ||
| 157 | +func DeleteOnlineStream(stream *OnlineStream) { | ||
| 158 | + C.DestroyOnlineStream(stream.impl) | ||
| 159 | + stream.impl = nil | ||
| 160 | +} | ||
| 161 | + | ||
| 162 | +// The user is responsible to invoke [DeleteOnlineStream]() to free | ||
| 163 | +// the returned stream to avoid memory leak | ||
| 164 | +func NewOnlineStream(recognizer *OnlineRecognizer) *OnlineStream { | ||
| 165 | + stream := &OnlineStream{} | ||
| 166 | + stream.impl = C.CreateOnlineStream(recognizer.impl) | ||
| 167 | + return stream | ||
| 168 | +} | ||
| 169 | + | ||
| 170 | +// Input audio samples for the stream. | ||
| 171 | +// | ||
| 172 | +// sampleRate is the actual sample rate of the input audio samples. If it | ||
| 173 | +// is different from the sample rate expected by the feature extractor, we will | ||
| 174 | +// do resampling inside. | ||
| 175 | +// | ||
| 176 | +// samples contains audio samples. Each sample is in the range [-1, 1] | ||
| 177 | +func (s *OnlineStream) AcceptWaveform(sampleRate int, samples []float32) { | ||
| 178 | + C.AcceptWaveform(s.impl, C.int(sampleRate), (*C.float)(&samples[0]), C.int(len(samples))) | ||
| 179 | +} | ||
| 180 | + | ||
| 181 | +// Signal that there will be no incoming audio samples. | ||
| 182 | +// After calling this function, you cannot call [OnlineStream.AcceptWaveform] any longer. | ||
| 183 | +// | ||
| 184 | +// The main purpose of this function is to flush the remaining audio samples | ||
| 185 | +// buffered inside for feature extraction. | ||
| 186 | +func (s *OnlineStream) InputFinished() { | ||
| 187 | + C.InputFinished(s.impl) | ||
| 188 | +} | ||
| 189 | + | ||
| 190 | +// Check whether the stream has enough feature frames for decoding. | ||
| 191 | +// Return true if this stream is ready for decoding. Return false otherwise. | ||
| 192 | +// | ||
| 193 | +// You will usually use it like below: | ||
| 194 | +// | ||
| 195 | +// for recognizer.IsReady(s) { | ||
| 196 | +// recognizer.Decode(s) | ||
| 197 | +// } | ||
| 198 | +func (recognizer *OnlineRecognizer) IsReady(s *OnlineStream) bool { | ||
| 199 | + return C.IsOnlineStreamReady(recognizer.impl, s.impl) == 1 | ||
| 200 | +} | ||
| 201 | + | ||
| 202 | +// Return true if an endpoint is detected. | ||
| 203 | +// | ||
| 204 | +// You usually use it like below: | ||
| 205 | +// | ||
| 206 | +// if recognizer.IsEndpoint(s) { | ||
| 207 | +// // do your own stuff after detecting an endpoint | ||
| 208 | +// | ||
| 209 | +// recognizer.Reset(s) | ||
| 210 | +// } | ||
| 211 | +func (recognizer *OnlineRecognizer) IsEndpoint(s *OnlineStream) bool { | ||
| 212 | + return C.IsEndpoint(recognizer.impl, s.impl) == 1 | ||
| 213 | +} | ||
| 214 | + | ||
| 215 | +// After calling this function, the internal neural network model states | ||
| 216 | +// are reset and IsEndpoint(s) would return false. GetResult(s) would also | ||
| 217 | +// return an empty string. | ||
| 218 | +func (recognizer *OnlineRecognizer) Reset(s *OnlineStream) { | ||
| 219 | + C.Reset(recognizer.impl, s.impl) | ||
| 220 | +} | ||
| 221 | + | ||
| 222 | +// Decode the stream. Before calling this function, you have to ensure | ||
| 223 | +// that recognizer.IsReady(s) returns true. Otherwise, you will be SAD. | ||
| 224 | +// | ||
| 225 | +// You usually use it like below: | ||
| 226 | +// | ||
| 227 | +// for recognizer.IsReady(s) { | ||
| 228 | +// recognizer.Decode(s) | ||
| 229 | +// } | ||
| 230 | +func (recognizer *OnlineRecognizer) Decode(s *OnlineStream) { | ||
| 231 | + C.DecodeOnlineStream(recognizer.impl, s.impl) | ||
| 232 | +} | ||
| 233 | + | ||
| 234 | +// Decode multiple streams in parallel, i.e., in batch. | ||
| 235 | +// You have to ensure that each stream is ready for decoding. Otherwise, | ||
| 236 | +// you will be SAD. | ||
| 237 | +func (recognizer *OnlineRecognizer) DecodeStreams(s []*OnlineStream) { | ||
| 238 | + ss := make([]*C.struct_SherpaOnnxOnlineStream, len(s)) | ||
| 239 | + for i, v := range s { | ||
| 240 | + ss[i] = v.impl | ||
| 241 | + } | ||
| 242 | + | ||
| 243 | + C.DecodeMultipleOnlineStreams(recognizer.impl, &ss[0], C.int(len(s))) | ||
| 244 | +} | ||
| 245 | + | ||
| 246 | +// Get the current result of stream since the last invoke of Reset() | ||
| 247 | +func (recognizer *OnlineRecognizer) GetResult(s *OnlineStream) *OnlineRecognizerResult { | ||
| 248 | + p := C.GetOnlineStreamResult(recognizer.impl, s.impl) | ||
| 249 | + defer C.DestroyOnlineRecognizerResult(p) | ||
| 250 | + result := &OnlineRecognizerResult{} | ||
| 251 | + result.Text = C.GoString(p.text) | ||
| 252 | + | ||
| 253 | + return result | ||
| 254 | +} | ||
| 255 | + | ||
| 256 | +// Configuration for offline/non-streaming transducer. | ||
| 257 | +// | ||
| 258 | +// Please refer to | ||
| 259 | +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html | ||
| 260 | +// to download pre-trained models | ||
| 261 | +type OfflineTransducerModelConfig struct { | ||
| 262 | + Encoder string // Path to the encoder model, i.e., encoder.onnx or encoder.int8.onnx | ||
| 263 | + Decoder string // Path to the decoder model | ||
| 264 | + Joiner string // Path to the joiner model | ||
| 265 | +} | ||
| 266 | + | ||
| 267 | +// Configuration for offline/non-streaming paraformer. | ||
| 268 | +// | ||
| 269 | +// please refer to | ||
| 270 | +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html | ||
| 271 | +// to download pre-trained models | ||
| 272 | +type OfflineParaformerModelConfig struct { | ||
| 273 | + Model string // Path to the model, e.g., model.onnx or model.int8.onnx | ||
| 274 | +} | ||
| 275 | + | ||
| 276 | +// Configuration for offline/non-streaming NeMo CTC models. | ||
| 277 | +// | ||
| 278 | +// Please refer to | ||
| 279 | +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html | ||
| 280 | +// to download pre-trained models | ||
| 281 | +type OfflineNemoEncDecCtcModelConfig struct { | ||
| 282 | + Model string // Path to the model, e.g., model.onnx or model.int8.onnx | ||
| 283 | +} | ||
| 284 | + | ||
| 285 | +// Configuration for offline LM. | ||
| 286 | +type OfflineLMConfig struct { | ||
| 287 | + Model string // Path to the model | ||
| 288 | + Scale float32 // scale for LM score | ||
| 289 | +} | ||
| 290 | + | ||
| 291 | +type OfflineModelConfig struct { | ||
| 292 | + Transducer OfflineTransducerModelConfig | ||
| 293 | + Paraformer OfflineParaformerModelConfig | ||
| 294 | + NemoCTC OfflineNemoEncDecCtcModelConfig | ||
| 295 | + Tokens string // Path to tokens.txt | ||
| 296 | + | ||
| 297 | + // Number of threads to use for neural network computation | ||
| 298 | + NumThreads int | ||
| 299 | + | ||
| 300 | + // 1 to print model meta information while loading | ||
| 301 | + Debug int | ||
| 302 | + | ||
| 303 | + // Optional. Valid values: cpu, cuda, coreml | ||
| 304 | + Provider string | ||
| 305 | + | ||
| 306 | + // Optional. Specify it for faster model initialization. | ||
| 307 | + ModelType string | ||
| 308 | +} | ||
| 309 | + | ||
| 310 | +// Configuration for the offline/non-streaming recognizer. | ||
| 311 | +type OfflineRecognizerConfig struct { | ||
| 312 | + FeatConfig FeatureConfig | ||
| 313 | + ModelConfig OfflineModelConfig | ||
| 314 | + LmConfig OfflineLMConfig | ||
| 315 | + | ||
| 316 | + // Valid decoding method: greedy_search, modified_beam_search | ||
| 317 | + DecodingMethod string | ||
| 318 | + | ||
| 319 | + // Used only when DecodingMethod is modified_beam_search. | ||
| 320 | + MaxActivePaths int | ||
| 321 | +} | ||
| 322 | + | ||
| 323 | +// It wraps a pointer from C | ||
| 324 | +type OfflineRecognizer struct { | ||
| 325 | + impl *C.struct_SherpaOnnxOfflineRecognizer | ||
| 326 | +} | ||
| 327 | + | ||
| 328 | +// It wraps a pointer from C | ||
| 329 | +type OfflineStream struct { | ||
| 330 | + impl *C.struct_SherpaOnnxOfflineStream | ||
| 331 | +} | ||
| 332 | + | ||
| 333 | +// It contains recognition result of an offline stream. | ||
| 334 | +type OfflineRecognizerResult struct { | ||
| 335 | + Text string | ||
| 336 | +} | ||
| 337 | + | ||
| 338 | +// Frees the internal pointer of the recognition to avoid memory leak. | ||
| 339 | +func DeleteOfflineRecognizer(recognizer *OfflineRecognizer) { | ||
| 340 | + C.DestroyOfflineRecognizer(recognizer.impl) | ||
| 341 | + recognizer.impl = nil | ||
| 342 | +} | ||
| 343 | + | ||
| 344 | +// The user is responsible to invoke [DeleteOfflineRecognizer]() to free | ||
| 345 | +// the returned recognizer to avoid memory leak | ||
| 346 | +func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { | ||
| 347 | + c := C.struct_SherpaOnnxOfflineRecognizerConfig{} | ||
| 348 | + c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate) | ||
| 349 | + c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim) | ||
| 350 | + | ||
| 351 | + c.model_config.transducer.encoder = C.CString(config.ModelConfig.Transducer.Encoder) | ||
| 352 | + defer C.free(unsafe.Pointer(c.model_config.transducer.encoder)) | ||
| 353 | + | ||
| 354 | + c.model_config.transducer.decoder = C.CString(config.ModelConfig.Transducer.Decoder) | ||
| 355 | + defer C.free(unsafe.Pointer(c.model_config.transducer.decoder)) | ||
| 356 | + | ||
| 357 | + c.model_config.transducer.joiner = C.CString(config.ModelConfig.Transducer.Joiner) | ||
| 358 | + defer C.free(unsafe.Pointer(c.model_config.transducer.joiner)) | ||
| 359 | + | ||
| 360 | + c.model_config.paraformer.model = C.CString(config.ModelConfig.Paraformer.Model) | ||
| 361 | + defer C.free(unsafe.Pointer(c.model_config.paraformer.model)) | ||
| 362 | + | ||
| 363 | + c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCTC.Model) | ||
| 364 | + defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) | ||
| 365 | + | ||
| 366 | + c.model_config.tokens = C.CString(config.ModelConfig.Tokens) | ||
| 367 | + defer C.free(unsafe.Pointer(c.model_config.tokens)) | ||
| 368 | + | ||
| 369 | + c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) | ||
| 370 | + | ||
| 371 | + c.model_config.debug = C.int(config.ModelConfig.Debug) | ||
| 372 | + | ||
| 373 | + c.model_config.provider = C.CString(config.ModelConfig.Provider) | ||
| 374 | + defer C.free(unsafe.Pointer(c.model_config.provider)) | ||
| 375 | + | ||
| 376 | + c.model_config.model_type = C.CString(config.ModelConfig.ModelType) | ||
| 377 | + defer C.free(unsafe.Pointer(c.model_config.model_type)) | ||
| 378 | + | ||
| 379 | + c.lm_config.model = C.CString(config.LmConfig.Model) | ||
| 380 | + defer C.free(unsafe.Pointer(c.lm_config.model)) | ||
| 381 | + | ||
| 382 | + c.lm_config.scale = C.float(config.LmConfig.Scale) | ||
| 383 | + | ||
| 384 | + c.decoding_method = C.CString(config.DecodingMethod) | ||
| 385 | + defer C.free(unsafe.Pointer(c.decoding_method)) | ||
| 386 | + | ||
| 387 | + c.max_active_paths = C.int(config.MaxActivePaths) | ||
| 388 | + | ||
| 389 | + recognizer := &OfflineRecognizer{} | ||
| 390 | + recognizer.impl = C.CreateOfflineRecognizer(&c) | ||
| 391 | + | ||
| 392 | + return recognizer | ||
| 393 | +} | ||
| 394 | + | ||
| 395 | +// Frees the internal pointer of the stream to avoid memory leak. | ||
| 396 | +func DeleteOfflineStream(stream *OfflineStream) { | ||
| 397 | + C.DestroyOfflineStream(stream.impl) | ||
| 398 | + stream.impl = nil | ||
| 399 | +} | ||
| 400 | + | ||
| 401 | +// The user is responsible to invoke [DeleteOfflineStream]() to free | ||
| 402 | +// the returned stream to avoid memory leak | ||
| 403 | +func NewOfflineStream(recognizer *OfflineRecognizer) *OfflineStream { | ||
| 404 | + stream := &OfflineStream{} | ||
| 405 | + stream.impl = C.CreateOfflineStream(recognizer.impl) | ||
| 406 | + return stream | ||
| 407 | +} | ||
| 408 | + | ||
| 409 | +// Input audio samples for the offline stream. | ||
| 410 | +// Please only call it once. That is, input all samples at once. | ||
| 411 | +// | ||
| 412 | +// sampleRate is the sample rate of the input audio samples. If it is different | ||
| 413 | +// from the value expected by the feature extractor, we will do resampling inside. | ||
| 414 | +// | ||
| 415 | +// samples contains the actual audio samples. Each sample is in the range [-1, 1]. | ||
| 416 | +func (s *OfflineStream) AcceptWaveform(sampleRate int, samples []float32) { | ||
| 417 | + C.AcceptWaveformOffline(s.impl, C.int(sampleRate), (*C.float)(&samples[0]), C.int(len(samples))) | ||
| 418 | +} | ||
| 419 | + | ||
| 420 | +// Decode the offline stream. | ||
| 421 | +func (recognizer *OfflineRecognizer) Decode(s *OfflineStream) { | ||
| 422 | + C.DecodeOfflineStream(recognizer.impl, s.impl) | ||
| 423 | +} | ||
| 424 | + | ||
| 425 | +// Decode multiple streams in parallel, i.e., in batch. | ||
| 426 | +func (recognizer *OfflineRecognizer) DecodeStreams(s []*OfflineStream) { | ||
| 427 | + ss := make([]*C.struct_SherpaOnnxOfflineStream, len(s)) | ||
| 428 | + for i, v := range s { | ||
| 429 | + ss[i] = v.impl | ||
| 430 | + } | ||
| 431 | + | ||
| 432 | + C.DecodeMultipleOfflineStreams(recognizer.impl, &ss[0], C.int(len(s))) | ||
| 433 | +} | ||
| 434 | + | ||
| 435 | +// Get the recognition result of the offline stream. | ||
| 436 | +func (s *OfflineStream) GetResult() *OfflineRecognizerResult { | ||
| 437 | + p := C.GetOfflineStreamResult(s.impl) | ||
| 438 | + defer C.DestroyOfflineRecognizerResult(p) | ||
| 439 | + result := &OfflineRecognizerResult{} | ||
| 440 | + result.Text = C.GoString(p.text) | ||
| 441 | + | ||
| 442 | + return result | ||
| 443 | +} |
-
请 注册 或 登录 后发表评论