Committed by
GitHub
Added tokens, tokens_arr and json for offline recongnizer result (#936)
Co-authored-by: leo <webmaster@360converter.com>
正在显示
2 个修改的文件
包含
65 行增加
和
7 行删除
| @@ -444,14 +444,49 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( | @@ -444,14 +444,49 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( | ||
| 444 | pText[text.size()] = 0; | 444 | pText[text.size()] = 0; |
| 445 | r->text = pText; | 445 | r->text = pText; |
| 446 | 446 | ||
| 447 | - if (!result.timestamps.empty()) { | ||
| 448 | - r->timestamps = new float[result.timestamps.size()]; | ||
| 449 | - std::copy(result.timestamps.begin(), result.timestamps.end(), | ||
| 450 | - r->timestamps); | ||
| 451 | - r->count = result.timestamps.size(); | 447 | + // copy json |
| 448 | + const auto &json = result.AsJsonString(); | ||
| 449 | + char *pJson = new char[json.size() + 1]; | ||
| 450 | + std::copy(json.begin(), json.end(), pJson); | ||
| 451 | + pJson[json.size()] = 0; | ||
| 452 | + r->json = pJson; | ||
| 453 | + | ||
| 454 | + // copy tokens | ||
| 455 | + auto count = result.tokens.size(); | ||
| 456 | + if (count > 0) { | ||
| 457 | + size_t total_length = 0; | ||
| 458 | + for (const auto &token : result.tokens) { | ||
| 459 | + // +1 for the null character at the end of each token | ||
| 460 | + total_length += token.size() + 1; | ||
| 461 | + } | ||
| 462 | + | ||
| 463 | + r->count = count; | ||
| 464 | + // Each word ends with nullptr | ||
| 465 | + char *tokens = new char[total_length]{}; | ||
| 466 | + char **tokens_temp = new char *[r->count]; | ||
| 467 | + int32_t pos = 0; | ||
| 468 | + for (int32_t i = 0; i < r->count; ++i) { | ||
| 469 | + tokens_temp[i] = tokens + pos; | ||
| 470 | + memcpy(tokens + pos, result.tokens[i].c_str(), result.tokens[i].size()); | ||
| 471 | + // +1 to move past the null character | ||
| 472 | + pos += result.tokens[i].size() + 1; | ||
| 473 | + } | ||
| 474 | + r->tokens_arr = tokens_temp; | ||
| 475 | + | ||
| 476 | + if (!result.timestamps.empty()) { | ||
| 477 | + r->timestamps = new float[r->count]; | ||
| 478 | + std::copy(result.timestamps.begin(), result.timestamps.end(), | ||
| 479 | + r->timestamps); | ||
| 480 | + } else { | ||
| 481 | + r->timestamps = nullptr; | ||
| 482 | + } | ||
| 483 | + | ||
| 484 | + r->tokens = tokens; | ||
| 452 | } else { | 485 | } else { |
| 453 | - r->timestamps = nullptr; | ||
| 454 | r->count = 0; | 486 | r->count = 0; |
| 487 | + r->timestamps = nullptr; | ||
| 488 | + r->tokens = nullptr; | ||
| 489 | + r->tokens_arr = nullptr; | ||
| 455 | } | 490 | } |
| 456 | 491 | ||
| 457 | return r; | 492 | return r; |
| @@ -462,6 +497,9 @@ void DestroyOfflineRecognizerResult( | @@ -462,6 +497,9 @@ void DestroyOfflineRecognizerResult( | ||
| 462 | if (r) { | 497 | if (r) { |
| 463 | delete[] r->text; | 498 | delete[] r->text; |
| 464 | delete[] r->timestamps; | 499 | delete[] r->timestamps; |
| 500 | + delete[] r->tokens; | ||
| 501 | + delete[] r->tokens_arr; | ||
| 502 | + delete[] r->json; | ||
| 465 | delete r; | 503 | delete r; |
| 466 | } | 504 | } |
| 467 | } | 505 | } |
| @@ -481,7 +481,27 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult { | @@ -481,7 +481,27 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult { | ||
| 481 | 481 | ||
| 482 | // number of entries in timestamps | 482 | // number of entries in timestamps |
| 483 | int32_t count; | 483 | int32_t count; |
| 484 | - // TODO(fangjun): Add more fields | 484 | + |
| 485 | + // Pointer to continuous memory which holds string based tokens | ||
| 486 | + // which are separated by \0 | ||
| 487 | + const char *tokens; | ||
| 488 | + | ||
| 489 | + // a pointer array containing the address of the first item in tokens | ||
| 490 | + const char *const *tokens_arr; | ||
| 491 | + | ||
| 492 | + /** Return a json string. | ||
| 493 | + * | ||
| 494 | + * The returned string contains: | ||
| 495 | + * { | ||
| 496 | + * "text": "The recognition result", | ||
| 497 | + * "tokens": [x, x, x], | ||
| 498 | + * "timestamps": [x, x, x], | ||
| 499 | + * "segment": x, | ||
| 500 | + * "start_time": x, | ||
| 501 | + * "is_final": true|false | ||
| 502 | + * } | ||
| 503 | + */ | ||
| 504 | + const char *json; | ||
| 485 | } SherpaOnnxOfflineRecognizerResult; | 505 | } SherpaOnnxOfflineRecognizerResult; |
| 486 | 506 | ||
| 487 | /// Get the result of the offline stream. | 507 | /// Get the result of the offline stream. |
-
请 注册 或 登录 后发表评论