decode.cc
3.0 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/**
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
*
* See LICENSE for clarification regarding multiple authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sherpa-onnx/csrc/decode.h"
#include <assert.h>
#include <algorithm>
#include <vector>
namespace sherpa_onnx {
std::vector<int32_t> GreedySearch(RnntModel &model, // NOLINT
const Ort::Value &encoder_out) {
std::vector<int64_t> encoder_out_shape =
encoder_out.GetTensorTypeAndShapeInfo().GetShape();
assert(encoder_out_shape[0] == 1 && "Only batch_size=1 is implemented");
Ort::Value projected_encoder_out =
model.RunJoinerEncoderProj(encoder_out.GetTensorData<float>(),
encoder_out_shape[1], encoder_out_shape[2]);
const float *p_projected_encoder_out =
projected_encoder_out.GetTensorData<float>();
int32_t context_size = 2; // hard-code it to 2
int32_t blank_id = 0; // hard-code it to 0
std::vector<int32_t> hyp(context_size, blank_id);
std::array<int64_t, 2> decoder_input{blank_id, blank_id};
Ort::Value decoder_out = model.RunDecoder(decoder_input.data(), context_size);
std::vector<int64_t> decoder_out_shape =
decoder_out.GetTensorTypeAndShapeInfo().GetShape();
Ort::Value projected_decoder_out = model.RunJoinerDecoderProj(
decoder_out.GetTensorData<float>(), decoder_out_shape[2]);
int32_t joiner_dim =
projected_decoder_out.GetTensorTypeAndShapeInfo().GetShape()[1];
int32_t T = encoder_out_shape[1];
for (int32_t t = 0; t != T; ++t) {
Ort::Value logit = model.RunJoiner(
p_projected_encoder_out + t * joiner_dim,
projected_decoder_out.GetTensorData<float>(), joiner_dim);
int32_t vocab_size = logit.GetTensorTypeAndShapeInfo().GetShape()[1];
const float *p_logit = logit.GetTensorData<float>();
auto y = static_cast<int32_t>(std::distance(
static_cast<const float *>(p_logit),
std::max_element(static_cast<const float *>(p_logit),
static_cast<const float *>(p_logit) + vocab_size)));
if (y != blank_id) {
decoder_input[0] = hyp.back();
decoder_input[1] = y;
hyp.push_back(y);
decoder_out = model.RunDecoder(decoder_input.data(), context_size);
projected_decoder_out = model.RunJoinerDecoderProj(
decoder_out.GetTensorData<float>(), decoder_out_shape[2]);
}
}
return {hyp.begin() + context_size, hyp.end()};
}
} // namespace sherpa_onnx