offline-recognizer.h
4.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// sherpa-onnx/csrc/offline-recognizer.h
//
// Copyright (c) 2023 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_H_
#include <memory>
#include <string>
#include <vector>
#include "sherpa-onnx/csrc/features.h"
#include "sherpa-onnx/csrc/homophone-replacer.h"
#include "sherpa-onnx/csrc/offline-ctc-fst-decoder-config.h"
#include "sherpa-onnx/csrc/offline-lm-config.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-stream.h"
#include "sherpa-onnx/csrc/offline-transducer-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"
namespace sherpa_onnx {
struct OfflineRecognitionResult;
struct OfflineRecognizerConfig {
FeatureExtractorConfig feat_config;
OfflineModelConfig model_config;
OfflineLMConfig lm_config;
OfflineCtcFstDecoderConfig ctc_fst_decoder_config;
std::string decoding_method = "greedy_search";
int32_t max_active_paths = 4;
std::string hotwords_file;
float hotwords_score = 1.5;
float blank_penalty = 0.0;
// If there are multiple rules, they are applied from left to right.
std::string rule_fsts;
// If there are multiple FST archives, they are applied from left to right.
std::string rule_fars;
HomophoneReplacerConfig hr;
// only greedy_search is implemented
// TODO(fangjun): Implement modified_beam_search
OfflineRecognizerConfig() = default;
OfflineRecognizerConfig(
const FeatureExtractorConfig &feat_config,
const OfflineModelConfig &model_config, const OfflineLMConfig &lm_config,
const OfflineCtcFstDecoderConfig &ctc_fst_decoder_config,
const std::string &decoding_method, int32_t max_active_paths,
const std::string &hotwords_file, float hotwords_score,
float blank_penalty, const std::string &rule_fsts,
const std::string &rule_fars, const HomophoneReplacerConfig &hr)
: feat_config(feat_config),
model_config(model_config),
lm_config(lm_config),
ctc_fst_decoder_config(ctc_fst_decoder_config),
decoding_method(decoding_method),
max_active_paths(max_active_paths),
hotwords_file(hotwords_file),
hotwords_score(hotwords_score),
blank_penalty(blank_penalty),
rule_fsts(rule_fsts),
rule_fars(rule_fars),
hr(hr) {}
void Register(ParseOptions *po);
bool Validate() const;
std::string ToString() const;
};
class OfflineRecognizerImpl;
class OfflineRecognizer {
public:
~OfflineRecognizer();
template <typename Manager>
OfflineRecognizer(Manager *mgr, const OfflineRecognizerConfig &config);
explicit OfflineRecognizer(const OfflineRecognizerConfig &config);
/// Create a stream for decoding.
std::unique_ptr<OfflineStream> CreateStream() const;
/** Create a stream for decoding.
*
* @param The hotwords for this string, it might contain several hotwords,
* the hotwords are separated by "/". In each of the hotwords, there
* are cjkchars or bpes, the bpe/cjkchar are separated by space (" ").
* For example, hotwords I LOVE YOU and HELLO WORLD, looks like:
*
* "▁I ▁LOVE ▁YOU/▁HE LL O ▁WORLD"
*/
std::unique_ptr<OfflineStream> CreateStream(
const std::string &hotwords) const;
/** Decode a single stream
*
* @param s The stream to decode.
*/
void DecodeStream(OfflineStream *s) const {
OfflineStream *ss[1] = {s};
DecodeStreams(ss, 1);
}
/** Decode a list of streams.
*
* @param ss Pointer to an array of streams.
* @param n Size of the input array.
*/
void DecodeStreams(OfflineStream **ss, int32_t n) const;
/** Onnxruntime Session objects are not affected by this method.
* The exact behavior can be defined by a specific recognizer impl.
* For instance, for the whisper recognizer, you can retrieve the language and
* task from the config and ignore any remaining fields in `config`.
*/
void SetConfig(const OfflineRecognizerConfig &config);
OfflineRecognizerConfig GetConfig() const;
private:
std::unique_ptr<OfflineRecognizerImpl> impl_;
};
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_H_