Fangjun Kuang
Committed by GitHub

Add Go API examples for adding punctuations to text. (#1478)

@@ -68,6 +68,13 @@ jobs: @@ -68,6 +68,13 @@ jobs:
68 run: | 68 run: |
69 gcc --version 69 gcc --version
70 70
  71 + - name: Test adding punctuation
  72 + if: matrix.os != 'windows-latest'
  73 + shell: bash
  74 + run: |
  75 + cd go-api-examples/add-punctuation/
  76 + ./run.sh
  77 +
71 - name: Test non-streaming speaker diarization 78 - name: Test non-streaming speaker diarization
72 if: matrix.os != 'windows-latest' 79 if: matrix.os != 'windows-latest'
73 shell: bash 80 shell: bash
@@ -134,6 +134,12 @@ jobs: @@ -134,6 +134,12 @@ jobs:
134 name: ${{ matrix.os }}-libs 134 name: ${{ matrix.os }}-libs
135 path: to-upload/ 135 path: to-upload/
136 136
  137 + - name: Test adding punctuation
  138 + shell: bash
  139 + run: |
  140 + cd scripts/go/_internal/add-punctuation/
  141 + ./run.sh
  142 +
137 - name: Test non-streaming speaker diarization 143 - name: Test non-streaming speaker diarization
138 shell: bash 144 shell: bash
139 run: | 145 run: |
  1 +module add-punctuation
  2 +
  3 +go 1.12
  1 +package main
  2 +
  3 +import (
  4 + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
  5 + "log"
  6 +)
  7 +
  8 +func main() {
  9 + log.SetFlags(log.LstdFlags | log.Lmicroseconds)
  10 +
  11 + config := sherpa.OfflinePunctuationConfig{}
  12 + config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx"
  13 + config.Model.NumThreads = 1
  14 + config.Model.Provider = "cpu"
  15 +
  16 + punct := sherpa.NewOfflinePunctuation(&config)
  17 + defer sherpa.DeleteOfflinePunc(punct)
  18 +
  19 + textArray := []string{
  20 + "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
  21 + "我们都是木头人不会说话不会动",
  22 + "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
  23 + }
  24 + log.Println("----------")
  25 + for _, text := range textArray {
  26 + newText := punct.AddPunct(text)
  27 + log.Printf("Input text: %v", text)
  28 + log.Printf("Output text: %v", newText)
  29 + log.Println("----------")
  30 + }
  31 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  7 + tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  8 + rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  9 +fi
  10 +
  11 +go mod tidy
  12 +go build
  13 +
  14 +./add-punctuation
  1 +module add-punctuation
  2 +
  3 +go 1.12
  4 +
  5 +replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
  1 +../../../../go-api-examples/add-punctuation/main.go
  1 +../../../../go-api-examples/add-punctuation/run.sh
@@ -1322,10 +1322,10 @@ func (sd *OfflineSpeakerDiarization) Process(samples []float32) []OfflineSpeaker @@ -1322,10 +1322,10 @@ func (sd *OfflineSpeakerDiarization) Process(samples []float32) []OfflineSpeaker
1322 // For punctuation 1322 // For punctuation
1323 // ============================================================ 1323 // ============================================================
1324 type OfflinePunctuationModelConfig struct { 1324 type OfflinePunctuationModelConfig struct {
1325 - Ct_transformer string  
1326 - Num_threads C.int  
1327 - Debug C.int // true to print debug information of the model  
1328 - Provider string 1325 + CtTransformer string
  1326 + NumThreads C.int
  1327 + Debug C.int // true to print debug information of the model
  1328 + Provider string
1329 } 1329 }
1330 1330
1331 type OfflinePunctuationConfig struct { 1331 type OfflinePunctuationConfig struct {
@@ -1338,10 +1338,10 @@ type OfflinePunctuation struct { @@ -1338,10 +1338,10 @@ type OfflinePunctuation struct {
1338 1338
1339 func NewOfflinePunctuation(config *OfflinePunctuationConfig) *OfflinePunctuation { 1339 func NewOfflinePunctuation(config *OfflinePunctuationConfig) *OfflinePunctuation {
1340 cfg := C.struct_SherpaOnnxOfflinePunctuationConfig{} 1340 cfg := C.struct_SherpaOnnxOfflinePunctuationConfig{}
1341 - cfg.model.ct_transformer = C.CString(config.Model.Ct_transformer) 1341 + cfg.model.ct_transformer = C.CString(config.Model.CtTransformer)
1342 defer C.free(unsafe.Pointer(cfg.model.ct_transformer)) 1342 defer C.free(unsafe.Pointer(cfg.model.ct_transformer))
1343 1343
1344 - cfg.model.num_threads = config.Model.Num_threads 1344 + cfg.model.num_threads = config.Model.NumThreads
1345 cfg.model.debug = config.Model.Debug 1345 cfg.model.debug = config.Model.Debug
1346 cfg.model.provider = C.CString(config.Model.Provider) 1346 cfg.model.provider = C.CString(config.Model.Provider)
1347 defer C.free(unsafe.Pointer(cfg.model.provider)) 1347 defer C.free(unsafe.Pointer(cfg.model.provider))