正在显示
10 个修改的文件
包含
268 行增加
和
0 行删除
| @@ -68,6 +68,13 @@ jobs: | @@ -68,6 +68,13 @@ jobs: | ||
| 68 | run: | | 68 | run: | |
| 69 | gcc --version | 69 | gcc --version |
| 70 | 70 | ||
| 71 | + - name: Test Keyword spotting | ||
| 72 | + if: matrix.os != 'windows-latest' | ||
| 73 | + shell: bash | ||
| 74 | + run: | | ||
| 75 | + cd go-api-examples/keyword-spotting-from-file/ | ||
| 76 | + ./run.sh | ||
| 77 | + | ||
| 71 | - name: Test adding punctuation | 78 | - name: Test adding punctuation |
| 72 | if: matrix.os != 'windows-latest' | 79 | if: matrix.os != 'windows-latest' |
| 73 | shell: bash | 80 | shell: bash |
| @@ -134,6 +134,15 @@ jobs: | @@ -134,6 +134,15 @@ jobs: | ||
| 134 | name: ${{ matrix.os }}-libs | 134 | name: ${{ matrix.os }}-libs |
| 135 | path: to-upload/ | 135 | path: to-upload/ |
| 136 | 136 | ||
| 137 | + - name: Test Keyword spotting | ||
| 138 | + shell: bash | ||
| 139 | + run: | | ||
| 140 | + cd scripts/go/_internal/keyword-spotting-from-file/ | ||
| 141 | + | ||
| 142 | + ./run.sh | ||
| 143 | + | ||
| 144 | + ls -lh | ||
| 145 | + | ||
| 137 | - name: Test non-streaming decoding files | 146 | - name: Test non-streaming decoding files |
| 138 | shell: bash | 147 | shell: bash |
| 139 | run: | | 148 | run: | |
| 1 | +package main | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
| 5 | + "log" | ||
| 6 | +) | ||
| 7 | + | ||
| 8 | +func main() { | ||
| 9 | + log.SetFlags(log.LstdFlags | log.Lmicroseconds) | ||
| 10 | + | ||
| 11 | + config := sherpa.KeywordSpotterConfig{} | ||
| 12 | + | ||
| 13 | + // Please download the models from | ||
| 14 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models | ||
| 15 | + | ||
| 16 | + config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx" | ||
| 17 | + config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" | ||
| 18 | + config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx" | ||
| 19 | + config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt" | ||
| 20 | + config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt" | ||
| 21 | + config.ModelConfig.NumThreads = 1 | ||
| 22 | + config.ModelConfig.Debug = 1 | ||
| 23 | + | ||
| 24 | + spotter := sherpa.NewKeywordSpotter(&config) | ||
| 25 | + defer sherpa.DeleteKeywordSpotter(spotter) | ||
| 26 | + | ||
| 27 | + wave_filename := "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav" | ||
| 28 | + | ||
| 29 | + wave := sherpa.ReadWave(wave_filename) | ||
| 30 | + if wave == nil { | ||
| 31 | + log.Printf("Failed to read %v\n", wave_filename) | ||
| 32 | + return | ||
| 33 | + } | ||
| 34 | + | ||
| 35 | + log.Println("----------Use pre-defined keywords----------") | ||
| 36 | + | ||
| 37 | + stream := sherpa.NewKeywordStream(spotter) | ||
| 38 | + defer sherpa.DeleteOnlineStream(stream) | ||
| 39 | + | ||
| 40 | + stream.AcceptWaveform(wave.SampleRate, wave.Samples) | ||
| 41 | + | ||
| 42 | + for spotter.IsReady(stream) { | ||
| 43 | + spotter.Decode(stream) | ||
| 44 | + result := spotter.GetResult(stream) | ||
| 45 | + if result.Keyword != "" { | ||
| 46 | + log.Printf("Detected %v\n", result.Keyword) | ||
| 47 | + } | ||
| 48 | + } | ||
| 49 | + | ||
| 50 | + log.Println("----------Use pre-defined keywords + add a new keyword----------") | ||
| 51 | + | ||
| 52 | + stream2 := sherpa.NewKeywordStreamWithKeywords(spotter, "y ǎn y uán @演员") | ||
| 53 | + defer sherpa.DeleteOnlineStream(stream2) | ||
| 54 | + | ||
| 55 | + stream2.AcceptWaveform(wave.SampleRate, wave.Samples) | ||
| 56 | + | ||
| 57 | + for spotter.IsReady(stream2) { | ||
| 58 | + spotter.Decode(stream2) | ||
| 59 | + result := spotter.GetResult(stream2) | ||
| 60 | + if result.Keyword != "" { | ||
| 61 | + log.Printf("Detected %v\n", result.Keyword) | ||
| 62 | + } | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + log.Println("----------Use pre-defined keywords + add 2 new keywords----------") | ||
| 66 | + | ||
| 67 | + stream3 := sherpa.NewKeywordStreamWithKeywords(spotter, "y ǎn y uán @演员/zh ī m íng @知名") | ||
| 68 | + defer sherpa.DeleteOnlineStream(stream3) | ||
| 69 | + | ||
| 70 | + stream3.AcceptWaveform(wave.SampleRate, wave.Samples) | ||
| 71 | + | ||
| 72 | + for spotter.IsReady(stream3) { | ||
| 73 | + spotter.Decode(stream3) | ||
| 74 | + result := spotter.GetResult(stream3) | ||
| 75 | + if result.Keyword != "" { | ||
| 76 | + log.Printf("Detected %v\n", result.Keyword) | ||
| 77 | + } | ||
| 78 | + } | ||
| 79 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 8 | + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +go mod tidy | ||
| 12 | +go build | ||
| 13 | +./keyword-spotting-from-file |
| 1 | +keyword-spotting-from-file |
| 1 | +../../../../go-api-examples/keyword-spotting-from-file/main.go |
| 1 | +../../../../go-api-examples/keyword-spotting-from-file/run.sh |
| @@ -1385,3 +1385,151 @@ func (punc *OfflinePunctuation) AddPunct(text string) string { | @@ -1385,3 +1385,151 @@ func (punc *OfflinePunctuation) AddPunct(text string) string { | ||
| 1385 | 1385 | ||
| 1386 | return text_with_punct | 1386 | return text_with_punct |
| 1387 | } | 1387 | } |
| 1388 | + | ||
| 1389 | +// Configuration for the online/streaming recognizer. | ||
| 1390 | +type KeywordSpotterConfig struct { | ||
| 1391 | + FeatConfig FeatureConfig | ||
| 1392 | + ModelConfig OnlineModelConfig | ||
| 1393 | + MaxActivePaths int | ||
| 1394 | + KeywordsFile string | ||
| 1395 | + KeywordsScore float32 | ||
| 1396 | + KeywordsThreshold float32 | ||
| 1397 | + KeywordsBuf string | ||
| 1398 | + KeywordsBufSize int | ||
| 1399 | +} | ||
| 1400 | + | ||
| 1401 | +type KeywordSpotterResult struct { | ||
| 1402 | + Keyword string | ||
| 1403 | +} | ||
| 1404 | + | ||
| 1405 | +type KeywordSpotter struct { | ||
| 1406 | + impl *C.struct_SherpaOnnxKeywordSpotter | ||
| 1407 | +} | ||
| 1408 | + | ||
| 1409 | +// Free the internal pointer inside the recognizer to avoid memory leak. | ||
| 1410 | +func DeleteKeywordSpotter(spotter *KeywordSpotter) { | ||
| 1411 | + C.SherpaOnnxDestroyKeywordSpotter(spotter.impl) | ||
| 1412 | + spotter.impl = nil | ||
| 1413 | +} | ||
| 1414 | + | ||
| 1415 | +// The user is responsible to invoke [DeleteKeywordSpotter]() to free | ||
| 1416 | +// the returned spotter to avoid memory leak | ||
| 1417 | +func NewKeywordSpotter(config *KeywordSpotterConfig) *KeywordSpotter { | ||
| 1418 | + c := C.struct_SherpaOnnxKeywordSpotterConfig{} | ||
| 1419 | + c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate) | ||
| 1420 | + c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim) | ||
| 1421 | + | ||
| 1422 | + c.model_config.transducer.encoder = C.CString(config.ModelConfig.Transducer.Encoder) | ||
| 1423 | + defer C.free(unsafe.Pointer(c.model_config.transducer.encoder)) | ||
| 1424 | + | ||
| 1425 | + c.model_config.transducer.decoder = C.CString(config.ModelConfig.Transducer.Decoder) | ||
| 1426 | + defer C.free(unsafe.Pointer(c.model_config.transducer.decoder)) | ||
| 1427 | + | ||
| 1428 | + c.model_config.transducer.joiner = C.CString(config.ModelConfig.Transducer.Joiner) | ||
| 1429 | + defer C.free(unsafe.Pointer(c.model_config.transducer.joiner)) | ||
| 1430 | + | ||
| 1431 | + c.model_config.paraformer.encoder = C.CString(config.ModelConfig.Paraformer.Encoder) | ||
| 1432 | + defer C.free(unsafe.Pointer(c.model_config.paraformer.encoder)) | ||
| 1433 | + | ||
| 1434 | + c.model_config.paraformer.decoder = C.CString(config.ModelConfig.Paraformer.Decoder) | ||
| 1435 | + defer C.free(unsafe.Pointer(c.model_config.paraformer.decoder)) | ||
| 1436 | + | ||
| 1437 | + c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model) | ||
| 1438 | + defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model)) | ||
| 1439 | + | ||
| 1440 | + c.model_config.tokens = C.CString(config.ModelConfig.Tokens) | ||
| 1441 | + defer C.free(unsafe.Pointer(c.model_config.tokens)) | ||
| 1442 | + | ||
| 1443 | + c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) | ||
| 1444 | + | ||
| 1445 | + c.model_config.provider = C.CString(config.ModelConfig.Provider) | ||
| 1446 | + defer C.free(unsafe.Pointer(c.model_config.provider)) | ||
| 1447 | + | ||
| 1448 | + c.model_config.debug = C.int(config.ModelConfig.Debug) | ||
| 1449 | + | ||
| 1450 | + c.model_config.model_type = C.CString(config.ModelConfig.ModelType) | ||
| 1451 | + defer C.free(unsafe.Pointer(c.model_config.model_type)) | ||
| 1452 | + | ||
| 1453 | + c.model_config.modeling_unit = C.CString(config.ModelConfig.ModelingUnit) | ||
| 1454 | + defer C.free(unsafe.Pointer(c.model_config.modeling_unit)) | ||
| 1455 | + | ||
| 1456 | + c.model_config.bpe_vocab = C.CString(config.ModelConfig.BpeVocab) | ||
| 1457 | + defer C.free(unsafe.Pointer(c.model_config.bpe_vocab)) | ||
| 1458 | + | ||
| 1459 | + c.model_config.tokens_buf = C.CString(config.ModelConfig.TokensBuf) | ||
| 1460 | + defer C.free(unsafe.Pointer(c.model_config.tokens_buf)) | ||
| 1461 | + | ||
| 1462 | + c.model_config.tokens_buf_size = C.int(config.ModelConfig.TokensBufSize) | ||
| 1463 | + | ||
| 1464 | + c.max_active_paths = C.int(config.MaxActivePaths) | ||
| 1465 | + | ||
| 1466 | + c.keywords_file = C.CString(config.KeywordsFile) | ||
| 1467 | + defer C.free(unsafe.Pointer(c.keywords_file)) | ||
| 1468 | + | ||
| 1469 | + c.keywords_score = C.float(config.KeywordsScore) | ||
| 1470 | + | ||
| 1471 | + c.keywords_threshold = C.float(config.KeywordsThreshold) | ||
| 1472 | + | ||
| 1473 | + c.keywords_buf = C.CString(config.KeywordsBuf) | ||
| 1474 | + defer C.free(unsafe.Pointer(c.keywords_buf)) | ||
| 1475 | + | ||
| 1476 | + c.keywords_buf_size = C.int(config.KeywordsBufSize) | ||
| 1477 | + | ||
| 1478 | + spotter := &KeywordSpotter{} | ||
| 1479 | + spotter.impl = C.SherpaOnnxCreateKeywordSpotter(&c) | ||
| 1480 | + | ||
| 1481 | + return spotter | ||
| 1482 | +} | ||
| 1483 | + | ||
| 1484 | +// The user is responsible to invoke [DeleteOnlineStream]() to free | ||
| 1485 | +// the returned stream to avoid memory leak | ||
| 1486 | +func NewKeywordStream(spotter *KeywordSpotter) *OnlineStream { | ||
| 1487 | + stream := &OnlineStream{} | ||
| 1488 | + stream.impl = C.SherpaOnnxCreateKeywordStream(spotter.impl) | ||
| 1489 | + return stream | ||
| 1490 | +} | ||
| 1491 | + | ||
| 1492 | +// The user is responsible to invoke [DeleteOnlineStream]() to free | ||
| 1493 | +// the returned stream to avoid memory leak | ||
| 1494 | +func NewKeywordStreamWithKeywords(spotter *KeywordSpotter, keywords string) *OnlineStream { | ||
| 1495 | + stream := &OnlineStream{} | ||
| 1496 | + | ||
| 1497 | + s := C.CString(keywords) | ||
| 1498 | + defer C.free(unsafe.Pointer(s)) | ||
| 1499 | + | ||
| 1500 | + stream.impl = C.SherpaOnnxCreateKeywordStreamWithKeywords(spotter.impl, s) | ||
| 1501 | + return stream | ||
| 1502 | +} | ||
| 1503 | + | ||
| 1504 | +// Check whether the stream has enough feature frames for decoding. | ||
| 1505 | +// Return true if this stream is ready for decoding. Return false otherwise. | ||
| 1506 | +// | ||
| 1507 | +// You will usually use it like below: | ||
| 1508 | +// | ||
| 1509 | +// for spotter.IsReady(s) { | ||
| 1510 | +// spotter.Decode(s) | ||
| 1511 | +// } | ||
| 1512 | +func (spotter *KeywordSpotter) IsReady(s *OnlineStream) bool { | ||
| 1513 | + return C.SherpaOnnxIsKeywordStreamReady(spotter.impl, s.impl) == 1 | ||
| 1514 | +} | ||
| 1515 | + | ||
| 1516 | +// Decode the stream. Before calling this function, you have to ensure | ||
| 1517 | +// that spotter.IsReady(s) returns true. Otherwise, you will be SAD. | ||
| 1518 | +// | ||
| 1519 | +// You usually use it like below: | ||
| 1520 | +// | ||
| 1521 | +// for spotter.IsReady(s) { | ||
| 1522 | +// spotter.Decode(s) | ||
| 1523 | +// } | ||
| 1524 | +func (spotter *KeywordSpotter) Decode(s *OnlineStream) { | ||
| 1525 | + C.SherpaOnnxDecodeKeywordStream(spotter.impl, s.impl) | ||
| 1526 | +} | ||
| 1527 | + | ||
| 1528 | +// Get the current result of stream since the last invoke of Reset() | ||
| 1529 | +func (spotter *KeywordSpotter) GetResult(s *OnlineStream) *KeywordSpotterResult { | ||
| 1530 | + p := C.SherpaOnnxGetKeywordResult(spotter.impl, s.impl) | ||
| 1531 | + defer C.SherpaOnnxDestroyKeywordResult(p) | ||
| 1532 | + result := &KeywordSpotterResult{} | ||
| 1533 | + result.Keyword = C.GoString(p.keyword) | ||
| 1534 | + return result | ||
| 1535 | +} |
-
请 注册 或 登录 后发表评论