SilverSulfide
Committed by GitHub

Add Python API support for Offline LM rescoring (#1033)

@@ -6,6 +6,7 @@ from typing import List, Optional @@ -6,6 +6,7 @@ from typing import List, Optional
6 from _sherpa_onnx import ( 6 from _sherpa_onnx import (
7 FeatureExtractorConfig, 7 FeatureExtractorConfig,
8 OfflineCtcFstDecoderConfig, 8 OfflineCtcFstDecoderConfig,
  9 + OfflineLMConfig,
9 OfflineModelConfig, 10 OfflineModelConfig,
10 OfflineNemoEncDecCtcModelConfig, 11 OfflineNemoEncDecCtcModelConfig,
11 OfflineParaformerModelConfig, 12 OfflineParaformerModelConfig,
@@ -56,6 +57,8 @@ class OfflineRecognizer(object): @@ -56,6 +57,8 @@ class OfflineRecognizer(object):
56 model_type: str = "transducer", 57 model_type: str = "transducer",
57 rule_fsts: str = "", 58 rule_fsts: str = "",
58 rule_fars: str = "", 59 rule_fars: str = "",
  60 + lm: str = "",
  61 + lm_scale: float = 0.1,
59 ): 62 ):
60 """ 63 """
61 Please refer to 64 Please refer to
@@ -143,9 +146,21 @@ class OfflineRecognizer(object): @@ -143,9 +146,21 @@ class OfflineRecognizer(object):
143 f"--hotwords-file. Currently given: {decoding_method}" 146 f"--hotwords-file. Currently given: {decoding_method}"
144 ) 147 )
145 148
  149 + if lm and decoding_method != "modified_beam_search":
  150 + raise ValueError(
  151 + "Please use --decoding-method=modified_beam_search when using "
  152 + f"--lm. Currently given: {decoding_method}"
  153 + )
  154 +
  155 + lm_config = OfflineLMConfig(
  156 + model=lm,
  157 + scale=lm_scale,
  158 + )
  159 +
146 recognizer_config = OfflineRecognizerConfig( 160 recognizer_config = OfflineRecognizerConfig(
147 feat_config=feat_config, 161 feat_config=feat_config,
148 model_config=model_config, 162 model_config=model_config,
  163 + lm_config=lm_config,
149 decoding_method=decoding_method, 164 decoding_method=decoding_method,
150 max_active_paths=max_active_paths, 165 max_active_paths=max_active_paths,
151 hotwords_file=hotwords_file, 166 hotwords_file=hotwords_file,