Committed by
GitHub
Add Go API examples for adding punctuations to text. (#1478)
正在显示
9 个修改的文件
包含
74 行增加
和
6 行删除
| @@ -68,6 +68,13 @@ jobs: | @@ -68,6 +68,13 @@ jobs: | ||
| 68 | run: | | 68 | run: | |
| 69 | gcc --version | 69 | gcc --version |
| 70 | 70 | ||
| 71 | + - name: Test adding punctuation | ||
| 72 | + if: matrix.os != 'windows-latest' | ||
| 73 | + shell: bash | ||
| 74 | + run: | | ||
| 75 | + cd go-api-examples/add-punctuation/ | ||
| 76 | + ./run.sh | ||
| 77 | + | ||
| 71 | - name: Test non-streaming speaker diarization | 78 | - name: Test non-streaming speaker diarization |
| 72 | if: matrix.os != 'windows-latest' | 79 | if: matrix.os != 'windows-latest' |
| 73 | shell: bash | 80 | shell: bash |
| @@ -134,6 +134,12 @@ jobs: | @@ -134,6 +134,12 @@ jobs: | ||
| 134 | name: ${{ matrix.os }}-libs | 134 | name: ${{ matrix.os }}-libs |
| 135 | path: to-upload/ | 135 | path: to-upload/ |
| 136 | 136 | ||
| 137 | + - name: Test adding punctuation | ||
| 138 | + shell: bash | ||
| 139 | + run: | | ||
| 140 | + cd scripts/go/_internal/add-punctuation/ | ||
| 141 | + ./run.sh | ||
| 142 | + | ||
| 137 | - name: Test non-streaming speaker diarization | 143 | - name: Test non-streaming speaker diarization |
| 138 | shell: bash | 144 | shell: bash |
| 139 | run: | | 145 | run: | |
go-api-examples/add-punctuation/go.mod
0 → 100644
go-api-examples/add-punctuation/main.go
0 → 100644
| 1 | +package main | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
| 5 | + "log" | ||
| 6 | +) | ||
| 7 | + | ||
| 8 | +func main() { | ||
| 9 | + log.SetFlags(log.LstdFlags | log.Lmicroseconds) | ||
| 10 | + | ||
| 11 | + config := sherpa.OfflinePunctuationConfig{} | ||
| 12 | + config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx" | ||
| 13 | + config.Model.NumThreads = 1 | ||
| 14 | + config.Model.Provider = "cpu" | ||
| 15 | + | ||
| 16 | + punct := sherpa.NewOfflinePunctuation(&config) | ||
| 17 | + defer sherpa.DeleteOfflinePunc(punct) | ||
| 18 | + | ||
| 19 | + textArray := []string{ | ||
| 20 | + "这是一个测试你好吗How are you我很好thank you are you ok谢谢你", | ||
| 21 | + "我们都是木头人不会说话不会动", | ||
| 22 | + "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry", | ||
| 23 | + } | ||
| 24 | + log.Println("----------") | ||
| 25 | + for _, text := range textArray { | ||
| 26 | + newText := punct.AddPunct(text) | ||
| 27 | + log.Printf("Input text: %v", text) | ||
| 28 | + log.Printf("Output text: %v", newText) | ||
| 29 | + log.Println("----------") | ||
| 30 | + } | ||
| 31 | +} |
go-api-examples/add-punctuation/run.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 | ||
| 8 | + rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +go mod tidy | ||
| 12 | +go build | ||
| 13 | + | ||
| 14 | +./add-punctuation |
scripts/go/_internal/add-punctuation/go.mod
0 → 100644
scripts/go/_internal/add-punctuation/main.go
0 → 120000
| 1 | +../../../../go-api-examples/add-punctuation/main.go |
scripts/go/_internal/add-punctuation/run.sh
0 → 120000
| 1 | +../../../../go-api-examples/add-punctuation/run.sh |
| @@ -1322,10 +1322,10 @@ func (sd *OfflineSpeakerDiarization) Process(samples []float32) []OfflineSpeaker | @@ -1322,10 +1322,10 @@ func (sd *OfflineSpeakerDiarization) Process(samples []float32) []OfflineSpeaker | ||
| 1322 | // For punctuation | 1322 | // For punctuation |
| 1323 | // ============================================================ | 1323 | // ============================================================ |
| 1324 | type OfflinePunctuationModelConfig struct { | 1324 | type OfflinePunctuationModelConfig struct { |
| 1325 | - Ct_transformer string | ||
| 1326 | - Num_threads C.int | ||
| 1327 | - Debug C.int // true to print debug information of the model | ||
| 1328 | - Provider string | 1325 | + CtTransformer string |
| 1326 | + NumThreads C.int | ||
| 1327 | + Debug C.int // true to print debug information of the model | ||
| 1328 | + Provider string | ||
| 1329 | } | 1329 | } |
| 1330 | 1330 | ||
| 1331 | type OfflinePunctuationConfig struct { | 1331 | type OfflinePunctuationConfig struct { |
| @@ -1338,10 +1338,10 @@ type OfflinePunctuation struct { | @@ -1338,10 +1338,10 @@ type OfflinePunctuation struct { | ||
| 1338 | 1338 | ||
| 1339 | func NewOfflinePunctuation(config *OfflinePunctuationConfig) *OfflinePunctuation { | 1339 | func NewOfflinePunctuation(config *OfflinePunctuationConfig) *OfflinePunctuation { |
| 1340 | cfg := C.struct_SherpaOnnxOfflinePunctuationConfig{} | 1340 | cfg := C.struct_SherpaOnnxOfflinePunctuationConfig{} |
| 1341 | - cfg.model.ct_transformer = C.CString(config.Model.Ct_transformer) | 1341 | + cfg.model.ct_transformer = C.CString(config.Model.CtTransformer) |
| 1342 | defer C.free(unsafe.Pointer(cfg.model.ct_transformer)) | 1342 | defer C.free(unsafe.Pointer(cfg.model.ct_transformer)) |
| 1343 | 1343 | ||
| 1344 | - cfg.model.num_threads = config.Model.Num_threads | 1344 | + cfg.model.num_threads = config.Model.NumThreads |
| 1345 | cfg.model.debug = config.Model.Debug | 1345 | cfg.model.debug = config.Model.Debug |
| 1346 | cfg.model.provider = C.CString(config.Model.Provider) | 1346 | cfg.model.provider = C.CString(config.Model.Provider) |
| 1347 | defer C.free(unsafe.Pointer(cfg.model.provider)) | 1347 | defer C.free(unsafe.Pointer(cfg.model.provider)) |
-
请 注册 或 登录 后发表评论