symbol-table.h
1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// sherpa-onnx/csrc/symbol-table.h
//
// Copyright (c) 2022-2023 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SYMBOL_TABLE_H_
#define SHERPA_ONNX_CSRC_SYMBOL_TABLE_H_
#include <string>
#include <unordered_map>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
namespace sherpa_onnx {
/// It manages mapping between symbols and integer IDs.
class SymbolTable {
public:
SymbolTable() = default;
/// Construct a symbol table from a file.
/// Each line in the file contains two fields:
///
/// sym ID
///
/// Fields are separated by space(s).
explicit SymbolTable(const std::string &filename);
#if __ANDROID_API__ >= 9
SymbolTable(AAssetManager *mgr, const std::string &filename);
#endif
/// Return a string representation of this symbol table
std::string ToString() const;
/// Return the symbol corresponding to the given ID.
const std::string operator[](int32_t id) const;
/// Return the ID corresponding to the given symbol.
int32_t operator[](const std::string &sym) const;
/// Return true if there is a symbol with the given ID.
bool Contains(int32_t id) const;
/// Return true if there is a given symbol in the symbol table.
bool Contains(const std::string &sym) const;
// for tokens.txt from Whisper
void ApplyBase64Decode();
int32_t NumSymbols() const { return id2sym_.size(); }
private:
void Init(std::istream &is);
private:
std::unordered_map<std::string, int32_t> sym2id_;
std::unordered_map<int32_t, std::string> id2sym_;
};
std::ostream &operator<<(std::ostream &os, const SymbolTable &symbol_table);
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_SYMBOL_TABLE_H_