Committed by
GitHub
Remove portaudio-go in Go API examples. (#2317)
Replace the deprecated portaudio-go integration with malgo in the Go real-time speech recognition example and correct version string typos in the Node.js examples. - Fixed “verison” typo in Node.js console logs. - Swapped out portaudio-go for malgo in the Go microphone example, introducing initRecognizer, callback-driven streaming, and sample conversion. - Removed portaudio-go from go.mod.
正在显示
4 个修改的文件
包含
57 行增加
和
46 行删除
| @@ -2,34 +2,14 @@ package main | @@ -2,34 +2,14 @@ package main | ||
| 2 | 2 | ||
| 3 | import ( | 3 | import ( |
| 4 | "fmt" | 4 | "fmt" |
| 5 | - portaudio "github.com/csukuangfj/portaudio-go" | 5 | + "github.com/gen2brain/malgo" |
| 6 | sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | 6 | sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" |
| 7 | flag "github.com/spf13/pflag" | 7 | flag "github.com/spf13/pflag" |
| 8 | "log" | 8 | "log" |
| 9 | "strings" | 9 | "strings" |
| 10 | ) | 10 | ) |
| 11 | 11 | ||
| 12 | -func main() { | ||
| 13 | - err := portaudio.Initialize() | ||
| 14 | - if err != nil { | ||
| 15 | - log.Fatalf("Unable to initialize portaudio: %v\n", err) | ||
| 16 | - } | ||
| 17 | - defer portaudio.Terminate() | ||
| 18 | - | ||
| 19 | - default_device, err := portaudio.DefaultInputDevice() | ||
| 20 | - if err != nil { | ||
| 21 | - log.Fatal("Failed to get default input device: %v\n", err) | ||
| 22 | - } | ||
| 23 | - fmt.Printf("Select default input device: %s\n", default_device.Name) | ||
| 24 | - param := portaudio.StreamParameters{} | ||
| 25 | - param.Input.Device = default_device | ||
| 26 | - param.Input.Channels = 1 | ||
| 27 | - param.Input.Latency = default_device.DefaultLowInputLatency | ||
| 28 | - | ||
| 29 | - param.SampleRate = 16000 | ||
| 30 | - param.FramesPerBuffer = 0 | ||
| 31 | - param.Flags = portaudio.ClipOff | ||
| 32 | - | 12 | +func initRecognizer() *sherpa.OnlineRecognizer { |
| 33 | config := sherpa.OnlineRecognizerConfig{} | 13 | config := sherpa.OnlineRecognizerConfig{} |
| 34 | config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} | 14 | config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} |
| 35 | 15 | ||
| @@ -55,37 +35,48 @@ func main() { | @@ -55,37 +35,48 @@ func main() { | ||
| 55 | log.Println("Initializing recognizer (may take several seconds)") | 35 | log.Println("Initializing recognizer (may take several seconds)") |
| 56 | recognizer := sherpa.NewOnlineRecognizer(&config) | 36 | recognizer := sherpa.NewOnlineRecognizer(&config) |
| 57 | log.Println("Recognizer created!") | 37 | log.Println("Recognizer created!") |
| 38 | + return recognizer | ||
| 39 | +} | ||
| 40 | + | ||
| 41 | +func main() { | ||
| 42 | + ctx, err := malgo.InitContext(nil, malgo.ContextConfig{}, func(message string) { | ||
| 43 | + fmt.Printf("LOG <%v>", message) | ||
| 44 | + }) | ||
| 45 | + chk(err) | ||
| 46 | + | ||
| 47 | + defer func() { | ||
| 48 | + _ = ctx.Uninit() | ||
| 49 | + ctx.Free() | ||
| 50 | + }() | ||
| 51 | + | ||
| 52 | + deviceConfig := malgo.DefaultDeviceConfig(malgo.Duplex) | ||
| 53 | + deviceConfig.Capture.Format = malgo.FormatS16 | ||
| 54 | + deviceConfig.Capture.Channels = 1 | ||
| 55 | + deviceConfig.Playback.Format = malgo.FormatS16 | ||
| 56 | + deviceConfig.Playback.Channels = 1 | ||
| 57 | + deviceConfig.SampleRate = 16000 | ||
| 58 | + deviceConfig.Alsa.NoMMap = 1 | ||
| 59 | + | ||
| 60 | + recognizer := initRecognizer() | ||
| 58 | defer sherpa.DeleteOnlineRecognizer(recognizer) | 61 | defer sherpa.DeleteOnlineRecognizer(recognizer) |
| 59 | 62 | ||
| 60 | stream := sherpa.NewOnlineStream(recognizer) | 63 | stream := sherpa.NewOnlineStream(recognizer) |
| 61 | defer sherpa.DeleteOnlineStream(stream) | 64 | defer sherpa.DeleteOnlineStream(stream) |
| 62 | 65 | ||
| 63 | - // you can choose another value for 0.1 if you want | ||
| 64 | - samplesPerCall := int32(param.SampleRate * 0.1) // 0.1 second | ||
| 65 | - | ||
| 66 | - samples := make([]float32, samplesPerCall) | ||
| 67 | - s, err := portaudio.OpenStream(param, samples) | ||
| 68 | - if err != nil { | ||
| 69 | - log.Fatalf("Failed to open the stream") | ||
| 70 | - } | ||
| 71 | - defer s.Close() | ||
| 72 | - chk(s.Start()) | ||
| 73 | - | ||
| 74 | var last_text string | 66 | var last_text string |
| 75 | 67 | ||
| 76 | segment_idx := 0 | 68 | segment_idx := 0 |
| 77 | 69 | ||
| 78 | - fmt.Println("Started! Please speak") | ||
| 79 | - | ||
| 80 | - for { | ||
| 81 | - chk(s.Read()) | ||
| 82 | - stream.AcceptWaveform(int(param.SampleRate), samples) | 70 | + onRecvFrames := func(_, pSample []byte, framecount uint32) { |
| 71 | + samples := samplesInt16ToFloat(pSample) | ||
| 72 | + stream.AcceptWaveform(16000, samples) | ||
| 83 | 73 | ||
| 74 | + // Please use a separate goroutine for decoding in your app | ||
| 84 | for recognizer.IsReady(stream) { | 75 | for recognizer.IsReady(stream) { |
| 85 | recognizer.Decode(stream) | 76 | recognizer.Decode(stream) |
| 86 | } | 77 | } |
| 87 | - | ||
| 88 | text := recognizer.GetResult(stream).Text | 78 | text := recognizer.GetResult(stream).Text |
| 79 | + | ||
| 89 | if len(text) != 0 && last_text != text { | 80 | if len(text) != 0 && last_text != text { |
| 90 | last_text = strings.ToLower(text) | 81 | last_text = strings.ToLower(text) |
| 91 | fmt.Printf("\r%d: %s", segment_idx, last_text) | 82 | fmt.Printf("\r%d: %s", segment_idx, last_text) |
| @@ -100,7 +91,18 @@ func main() { | @@ -100,7 +91,18 @@ func main() { | ||
| 100 | } | 91 | } |
| 101 | } | 92 | } |
| 102 | 93 | ||
| 103 | - chk(s.Stop()) | 94 | + captureCallbacks := malgo.DeviceCallbacks{ |
| 95 | + Data: onRecvFrames, | ||
| 96 | + } | ||
| 97 | + | ||
| 98 | + device, err := malgo.InitDevice(ctx.Context, deviceConfig, captureCallbacks) | ||
| 99 | + chk(err) | ||
| 100 | + | ||
| 101 | + err = device.Start() | ||
| 102 | + chk(err) | ||
| 103 | + fmt.Println("Started. Please speak. Press ctrl + C to exit") | ||
| 104 | + fmt.Scanln() | ||
| 105 | + device.Uninit() | ||
| 104 | } | 106 | } |
| 105 | 107 | ||
| 106 | func chk(err error) { | 108 | func chk(err error) { |
| @@ -108,3 +110,16 @@ func chk(err error) { | @@ -108,3 +110,16 @@ func chk(err error) { | ||
| 108 | panic(err) | 110 | panic(err) |
| 109 | } | 111 | } |
| 110 | } | 112 | } |
| 113 | + | ||
| 114 | +func samplesInt16ToFloat(inSamples []byte) []float32 { | ||
| 115 | + numSamples := len(inSamples) / 2 | ||
| 116 | + outSamples := make([]float32, numSamples) | ||
| 117 | + | ||
| 118 | + for i := 0; i != numSamples; i++ { | ||
| 119 | + // Decode two bytes into an int16 using bit manipulation | ||
| 120 | + s16 := int16(inSamples[2*i]) | int16(inSamples[2*i+1])<<8 | ||
| 121 | + outSamples[i] = float32(s16) / 32768 | ||
| 122 | + } | ||
| 123 | + | ||
| 124 | + return outSamples | ||
| 125 | +} |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | const sherpa_onnx = require('sherpa-onnx-node'); | 2 | const sherpa_onnx = require('sherpa-onnx-node'); |
| 3 | -console.log(`verison : ${sherpa_onnx.version}`); | 3 | +console.log(`version : ${sherpa_onnx.version}`); |
| 4 | console.log(`git sha1: ${sherpa_onnx.gitSha1}`); | 4 | console.log(`git sha1: ${sherpa_onnx.gitSha1}`); |
| 5 | console.log(`git date: ${sherpa_onnx.gitDate}`); | 5 | console.log(`git date: ${sherpa_onnx.gitDate}`); |
| 6 | 6 |
| 1 | // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | 1 | // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) |
| 2 | // | 2 | // |
| 3 | const sherpa_onnx = require('sherpa-onnx'); | 3 | const sherpa_onnx = require('sherpa-onnx'); |
| 4 | -console.log(`verison : ${sherpa_onnx.version}`); | 4 | +console.log(`version : ${sherpa_onnx.version}`); |
| 5 | console.log(`git sha1: ${sherpa_onnx.gitSha1}`); | 5 | console.log(`git sha1: ${sherpa_onnx.gitSha1}`); |
| 6 | console.log(`git date: ${sherpa_onnx.gitDate}`); | 6 | console.log(`git date: ${sherpa_onnx.gitDate}`); |
| 7 | 7 |
-
请 注册 或 登录 后发表评论