Fangjun Kuang
Committed by GitHub

Fix CI (#964)

... ... @@ -23,6 +23,8 @@ namespace SherpaOnnx
Debug = 0;
Provider = "cpu";
ModelType = "";
ModelingUnit = "cjkchar";
BpeVocab = "";
}
public OfflineTransducerModelConfig Transducer;
public OfflineParaformerModelConfig Paraformer;
... ... @@ -42,5 +44,11 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string ModelType;
[MarshalAs(UnmanagedType.LPStr)]
public string ModelingUnit;
[MarshalAs(UnmanagedType.LPStr)]
public string BpeVocab;
}
}
... ...
... ... @@ -23,6 +23,8 @@ namespace SherpaOnnx
Provider = "cpu";
Debug = 0;
ModelType = "";
ModelingUnit = "cjkchar";
BpeVocab = "";
}
public OnlineTransducerModelConfig Transducer;
... ... @@ -43,5 +45,11 @@ namespace SherpaOnnx
[MarshalAs(UnmanagedType.LPStr)]
public string ModelType;
[MarshalAs(UnmanagedType.LPStr)]
public string ModelingUnit;
[MarshalAs(UnmanagedType.LPStr)]
public string BpeVocab;
}
}
... ...
... ... @@ -87,6 +87,8 @@ type OnlineModelConfig struct {
Provider string // Optional. Valid values are: cpu, cuda, coreml
Debug int // 1 to show model meta information while loading it.
ModelType string // Optional. You can specify it for faster model initialization
ModelingUnit string // Optional. cjkchar, bpe, cjkchar+bpe
BpeVocab string // Optional.
}
// Configuration for the feature extractor
... ... @@ -187,6 +189,12 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
c.model_config.model_type = C.CString(config.ModelConfig.ModelType)
defer C.free(unsafe.Pointer(c.model_config.model_type))
c.model_config.modeling_unit = C.CString(config.ModelConfig.ModelingUnit)
defer C.free(unsafe.Pointer(c.model_config.modeling_unit))
c.model_config.bpe_vocab = C.CString(config.ModelConfig.BpeVocab)
defer C.free(unsafe.Pointer(c.model_config.bpe_vocab))
c.decoding_method = C.CString(config.DecodingMethod)
defer C.free(unsafe.Pointer(c.decoding_method))
... ... @@ -372,6 +380,9 @@ type OfflineModelConfig struct {
// Optional. Specify it for faster model initialization.
ModelType string
ModelingUnit string // Optional. cjkchar, bpe, cjkchar+bpe
BpeVocab string // Optional.
}
// Configuration for the offline/non-streaming recognizer.
... ... @@ -460,6 +471,12 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
c.model_config.model_type = C.CString(config.ModelConfig.ModelType)
defer C.free(unsafe.Pointer(c.model_config.model_type))
c.model_config.modeling_unit = C.CString(config.ModelConfig.ModelingUnit)
defer C.free(unsafe.Pointer(c.model_config.modeling_unit))
c.model_config.bpe_vocab = C.CString(config.ModelConfig.BpeVocab)
defer C.free(unsafe.Pointer(c.model_config.bpe_vocab))
c.lm_config.model = C.CString(config.LmConfig.Model)
defer C.free(unsafe.Pointer(c.lm_config.model))
... ...
... ... @@ -126,6 +126,8 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
SHERPA_ONNX_ASSIGN_ATTR_STR(model_type, modelType);
SHERPA_ONNX_ASSIGN_ATTR_STR(modeling_unit, modelingUnit);
SHERPA_ONNX_ASSIGN_ATTR_STR(bpe_vocab, bpeVocab);
return c;
}
... ... @@ -232,6 +234,14 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
delete[] c.model_config.model_type;
}
if (c.model_config.modeling_unit) {
delete[] c.model_config.modeling_unit;
}
if (c.model_config.bpe_vocab) {
delete[] c.model_config.bpe_vocab;
}
if (c.lm_config.model) {
delete[] c.lm_config.model;
}
... ...
... ... @@ -118,6 +118,8 @@ SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
}
SHERPA_ONNX_ASSIGN_ATTR_STR(model_type, modelType);
SHERPA_ONNX_ASSIGN_ATTR_STR(modeling_unit, modelingUnit);
SHERPA_ONNX_ASSIGN_ATTR_STR(bpe_vocab, bpeVocab);
return c;
}
... ... @@ -228,6 +230,14 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
delete[] c.model_config.model_type;
}
if (c.model_config.modeling_unit) {
delete[] c.model_config.modeling_unit;
}
if (c.model_config.bpe_vocab) {
delete[] c.model_config.bpe_vocab;
}
if (c.decoding_method) {
delete[] c.decoding_method;
}
... ...
... ... @@ -88,7 +88,9 @@ func sherpaOnnxOnlineModelConfig(
numThreads: Int = 1,
provider: String = "cpu",
debug: Int = 0,
modelType: String = ""
modelType: String = "",
modelingUnit: String = "cjkchar",
bpeVocab: String = ""
) -> SherpaOnnxOnlineModelConfig {
return SherpaOnnxOnlineModelConfig(
transducer: transducer,
... ... @@ -98,7 +100,9 @@ func sherpaOnnxOnlineModelConfig(
num_threads: Int32(numThreads),
provider: toCPointer(provider),
debug: Int32(debug),
model_type: toCPointer(modelType)
model_type: toCPointer(modelType),
modeling_unit: toCPointer(modelingUnit),
bpeVocab: toCPointer(bpeVocab)
)
}
... ... @@ -354,7 +358,9 @@ func sherpaOnnxOfflineModelConfig(
numThreads: Int = 1,
provider: String = "cpu",
debug: Int = 0,
modelType: String = ""
modelType: String = "",
modelingUnit: String = "cjkchar",
bpeVocab: String = ""
) -> SherpaOnnxOfflineModelConfig {
return SherpaOnnxOfflineModelConfig(
transducer: transducer,
... ... @@ -366,7 +372,9 @@ func sherpaOnnxOfflineModelConfig(
num_threads: Int32(numThreads),
debug: Int32(debug),
provider: toCPointer(provider),
model_type: toCPointer(modelType)
model_type: toCPointer(modelType),
modeling_unit: toCPointer(modelingUnit),
bpeVocab: toCPointer(bpeVocab)
)
}
... ...
... ... @@ -137,7 +137,7 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(
config.zipformer2Ctc, Module);
const len = transducer.len + paraformer.len + ctc.len + 5 * 4;
const len = transducer.len + paraformer.len + ctc.len + 7 * 4;
const ptr = Module._malloc(len);
let offset = 0;
... ... @@ -153,7 +153,11 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
const bufferLen = tokensLen + providerLen + modelTypeLen;
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
const bufferLen =
tokensLen + providerLen + modelTypeLen + modelingUnitLen + bpeVocabLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
... ... @@ -164,6 +168,14 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
offset += providerLen;
Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
offset += modelTypeLen;
Module.stringToUTF8(
config.modelingUnit || '', buffer + offset, modelingUnitLen);
offset += modelingUnitLen;
Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen);
offset += bpeVocabLen;
offset = transducer.len + paraformer.len + ctc.len;
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
... ... @@ -182,6 +194,17 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
offset += 4;
Module.setValue(
ptr + offset, buffer + tokensLen + providerLen + modelTypeLen,
'i8*'); // modelingUnit
offset += 4;
Module.setValue(
ptr + offset,
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen,
'i8*'); // bpeVocab
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
paraformer: paraformer, ctc: ctc
... ... @@ -317,6 +340,8 @@ function createOnlineRecognizer(Module, myConfig) {
provider: 'cpu',
debug: 1,
modelType: '',
modelingUnit: 'cjkchar',
bpeVocab: '',
};
const featureConfig = {
... ... @@ -504,7 +529,7 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module);
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
tdnn.len + 5 * 4;
tdnn.len + 7 * 4;
const ptr = Module._malloc(len);
let offset = 0;
... ... @@ -526,7 +551,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
const bufferLen = tokensLen + providerLen + modelTypeLen;
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
const bufferLen =
tokensLen + providerLen + modelTypeLen + modelingUnitLen + bpeVocabLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
... ... @@ -537,6 +566,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
offset += providerLen;
Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
offset += modelTypeLen;
Module.stringToUTF8(
config.modelingUnit || '', buffer + offset, modelingUnitLen);
offset += modelingUnitLen;
Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen);
offset += bpeVocabLen;
offset =
transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len;
... ... @@ -556,6 +593,17 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
offset += 4;
Module.setValue(
ptr + offset, buffer + tokensLen + providerLen + modelTypeLen,
'i8*'); // modelingUnit
offset += 4;
Module.setValue(
ptr + offset,
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen,
'i8*'); // bpeVocab
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn
... ...
... ... @@ -19,7 +19,7 @@ static_assert(sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) == 1 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineModelConfig) ==
sizeof(SherpaOnnxOnlineTransducerModelConfig) +
sizeof(SherpaOnnxOnlineParaformerModelConfig) +
sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 5 * 4,
sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 7 * 4,
"");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
... ... @@ -52,6 +52,8 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
fprintf(stdout, "provider: %s\n", model_config->provider);
fprintf(stdout, "debug: %d\n", model_config->debug);
fprintf(stdout, "model type: %s\n", model_config->model_type);
fprintf(stdout, "modeling unit: %s\n", model_config->modeling_unit);
fprintf(stdout, "bpe vocab: %s\n", model_config->bpe_vocab);
fprintf(stdout, "----------feat config----------\n");
fprintf(stdout, "sample rate: %d\n", feat->sample_rate);
... ...
... ... @@ -23,7 +23,7 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
sizeof(SherpaOnnxOfflineParaformerModelConfig) +
sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
sizeof(SherpaOnnxOfflineWhisperModelConfig) +
sizeof(SherpaOnnxOfflineTdnnModelConfig) + 5 * 4,
sizeof(SherpaOnnxOfflineTdnnModelConfig) + 7 * 4,
"");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
... ... @@ -90,6 +90,8 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
fprintf(stdout, "provider: %s\n", model_config->provider);
fprintf(stdout, "debug: %d\n", model_config->debug);
fprintf(stdout, "model type: %s\n", model_config->model_type);
fprintf(stdout, "modeling unit: %s\n", model_config->modeling_unit);
fprintf(stdout, "bpe vocab: %s\n", model_config->bpe_vocab);
fprintf(stdout, "----------feat config----------\n");
fprintf(stdout, "sample rate: %d\n", feat->sample_rate);
... ...