Committed by
GitHub
Handle invalid utf8 sequence from Whisper for Dart API. (#1106)
Fixes #1104
正在显示
4 个修改的文件
包含
33 行增加
和
2 行删除
| @@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart'; | @@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart'; | ||
| 7 | import './feature_config.dart'; | 7 | import './feature_config.dart'; |
| 8 | import './offline_stream.dart'; | 8 | import './offline_stream.dart'; |
| 9 | import './sherpa_onnx_bindings.dart'; | 9 | import './sherpa_onnx_bindings.dart'; |
| 10 | +import './utils.dart'; | ||
| 10 | 11 | ||
| 11 | class OfflineTransducerModelConfig { | 12 | class OfflineTransducerModelConfig { |
| 12 | const OfflineTransducerModelConfig({ | 13 | const OfflineTransducerModelConfig({ |
| @@ -287,7 +288,7 @@ class OfflineRecognizer { | @@ -287,7 +288,7 @@ class OfflineRecognizer { | ||
| 287 | return OfflineRecognizerResult(text: '', tokens: [], timestamps: []); | 288 | return OfflineRecognizerResult(text: '', tokens: [], timestamps: []); |
| 288 | } | 289 | } |
| 289 | 290 | ||
| 290 | - final parsedJson = jsonDecode(json.toDartString()); | 291 | + final parsedJson = jsonDecode(toDartString(json)); |
| 291 | 292 | ||
| 292 | SherpaOnnxBindings.destroyOfflineStreamResultJson?.call(json); | 293 | SherpaOnnxBindings.destroyOfflineStreamResultJson?.call(json); |
| 293 | 294 |
| @@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart'; | @@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart'; | ||
| 7 | import './feature_config.dart'; | 7 | import './feature_config.dart'; |
| 8 | import './online_stream.dart'; | 8 | import './online_stream.dart'; |
| 9 | import './sherpa_onnx_bindings.dart'; | 9 | import './sherpa_onnx_bindings.dart'; |
| 10 | +import './utils.dart'; | ||
| 10 | 11 | ||
| 11 | class OnlineTransducerModelConfig { | 12 | class OnlineTransducerModelConfig { |
| 12 | const OnlineTransducerModelConfig({ | 13 | const OnlineTransducerModelConfig({ |
| @@ -268,7 +269,7 @@ class OnlineRecognizer { | @@ -268,7 +269,7 @@ class OnlineRecognizer { | ||
| 268 | return OnlineRecognizerResult(text: '', tokens: [], timestamps: []); | 269 | return OnlineRecognizerResult(text: '', tokens: [], timestamps: []); |
| 269 | } | 270 | } |
| 270 | 271 | ||
| 271 | - final parsedJson = jsonDecode(json.toDartString()); | 272 | + final parsedJson = jsonDecode(toDartString(json)); |
| 272 | 273 | ||
| 273 | SherpaOnnxBindings.destroyOnlineStreamResultJson?.call(json); | 274 | SherpaOnnxBindings.destroyOnlineStreamResultJson?.call(json); |
| 274 | 275 |
flutter/sherpa_onnx/lib/src/utils.dart
0 → 100644
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:convert'; | ||
| 3 | +import 'dart:ffi'; | ||
| 4 | +import 'dart:typed_data'; | ||
| 5 | + | ||
| 6 | +import 'package:ffi/ffi.dart'; | ||
| 7 | + | ||
| 8 | +int _strLen(Pointer<Uint8> codeUnits) { | ||
| 9 | + // this function is copied from | ||
| 10 | + // https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L52 | ||
| 11 | + var length = 0; | ||
| 12 | + while (codeUnits[length] != 0) { | ||
| 13 | + length++; | ||
| 14 | + } | ||
| 15 | + return length; | ||
| 16 | +} | ||
| 17 | + | ||
| 18 | +// This function is modified from | ||
| 19 | +// https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L41 | ||
| 20 | +// It ignores invalid utf8 sequence | ||
| 21 | +String toDartString(Pointer<Utf8> s) { | ||
| 22 | + final codeUnits = s.cast<Uint8>(); | ||
| 23 | + final length = _strLen(codeUnits); | ||
| 24 | + return utf8.decode(codeUnits.asTypedList(length), allowMalformed: true); | ||
| 25 | +} |
-
请 注册 或 登录 后发表评论