Fangjun Kuang
Committed by GitHub

Handle invalid utf8 sequence from Whisper for Dart API. (#1106)

Fixes #1104
  1 +## 1.10.14 (to-be-released)
  2 +
  3 +* Fix invalid utf8 sequence from Whisper for Dart API.
  4 +
1 ## 1.10.13 5 ## 1.10.13
2 6
3 * Update onnxruntime from 1.17.1 to 1.18.0 7 * Update onnxruntime from 1.17.1 to 1.18.0
@@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart'; @@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart';
7 import './feature_config.dart'; 7 import './feature_config.dart';
8 import './offline_stream.dart'; 8 import './offline_stream.dart';
9 import './sherpa_onnx_bindings.dart'; 9 import './sherpa_onnx_bindings.dart';
  10 +import './utils.dart';
10 11
11 class OfflineTransducerModelConfig { 12 class OfflineTransducerModelConfig {
12 const OfflineTransducerModelConfig({ 13 const OfflineTransducerModelConfig({
@@ -287,7 +288,7 @@ class OfflineRecognizer { @@ -287,7 +288,7 @@ class OfflineRecognizer {
287 return OfflineRecognizerResult(text: '', tokens: [], timestamps: []); 288 return OfflineRecognizerResult(text: '', tokens: [], timestamps: []);
288 } 289 }
289 290
290 - final parsedJson = jsonDecode(json.toDartString()); 291 + final parsedJson = jsonDecode(toDartString(json));
291 292
292 SherpaOnnxBindings.destroyOfflineStreamResultJson?.call(json); 293 SherpaOnnxBindings.destroyOfflineStreamResultJson?.call(json);
293 294
@@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart'; @@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart';
7 import './feature_config.dart'; 7 import './feature_config.dart';
8 import './online_stream.dart'; 8 import './online_stream.dart';
9 import './sherpa_onnx_bindings.dart'; 9 import './sherpa_onnx_bindings.dart';
  10 +import './utils.dart';
10 11
11 class OnlineTransducerModelConfig { 12 class OnlineTransducerModelConfig {
12 const OnlineTransducerModelConfig({ 13 const OnlineTransducerModelConfig({
@@ -268,7 +269,7 @@ class OnlineRecognizer { @@ -268,7 +269,7 @@ class OnlineRecognizer {
268 return OnlineRecognizerResult(text: '', tokens: [], timestamps: []); 269 return OnlineRecognizerResult(text: '', tokens: [], timestamps: []);
269 } 270 }
270 271
271 - final parsedJson = jsonDecode(json.toDartString()); 272 + final parsedJson = jsonDecode(toDartString(json));
272 273
273 SherpaOnnxBindings.destroyOnlineStreamResultJson?.call(json); 274 SherpaOnnxBindings.destroyOnlineStreamResultJson?.call(json);
274 275
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:convert';
  3 +import 'dart:ffi';
  4 +import 'dart:typed_data';
  5 +
  6 +import 'package:ffi/ffi.dart';
  7 +
  8 +int _strLen(Pointer<Uint8> codeUnits) {
  9 + // this function is copied from
  10 + // https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L52
  11 + var length = 0;
  12 + while (codeUnits[length] != 0) {
  13 + length++;
  14 + }
  15 + return length;
  16 +}
  17 +
  18 +// This function is modified from
  19 +// https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L41
  20 +// It ignores invalid utf8 sequence
  21 +String toDartString(Pointer<Utf8> s) {
  22 + final codeUnits = s.cast<Uint8>();
  23 + final length = _strLen(codeUnits);
  24 + return utf8.decode(codeUnits.asTypedList(length), allowMalformed: true);
  25 +}