Aruxxxi
Committed by GitHub

feat: add punctuation C++ API (#2510)

Co-authored-by: Aruxxxi <xiangcl@zhisuan.com>
... ... @@ -30,6 +30,9 @@ target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc)
target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api)
add_executable(punctuation-cxx-api ./punctuation-cxx-api.cc)
target_link_libraries(punctuation-cxx-api sherpa-onnx-cxx-api)
if(SHERPA_ONNX_ENABLE_PORTAUDIO)
add_executable(sense-voice-simulate-streaming-microphone-cxx-api
./sense-voice-simulate-streaming-microphone-cxx-api.cc
... ...
// cxx-api-examples/punctuation-cxx-api.cc
// Copyright (c) 2025 Xiaomi Corporation
// To use punctuation model:
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
// tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
// rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
#include <iostream>
#include <string>
#include "sherpa-onnx/c-api/cxx-api.h"
int32_t main() {
using namespace sherpa_onnx::cxx; // NOLINT
OfflinePunctuationConfig punctuation_config;
punctuation_config.model.ct_transformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
punctuation_config.model.num_threads = 1;
punctuation_config.model.debug = false;
punctuation_config.model.provider = "cpu";
OfflinePunctuation punct = OfflinePunctuation::Create(punctuation_config);
if (!punct.Get()) {
std::cerr << "Failed to create punctuation model. Please check your config\n";
return -1;
}
std::string text = "你好吗how are you Fantasitic 谢谢我很好你怎么样呢";
std::string text_with_punct = punct.AddPunctuation(text);
std::cout << "Original text: " << text << std::endl;
std::cout << "With punctuation: " << text_with_punct << std::endl;
return 0;
}
... ...
... ... @@ -821,4 +821,33 @@ bool FileExists(const std::string &filename) {
return SherpaOnnxFileExists(filename.c_str());
}
// ============================================================
// For Offline Punctuation
// ============================================================
OfflinePunctuation OfflinePunctuation::Create(const OfflinePunctuationConfig &config) {
struct SherpaOnnxOfflinePunctuationConfig c;
memset(&c, 0, sizeof(c));
c.model.ct_transformer = config.model.ct_transformer.c_str();
c.model.num_threads = config.model.num_threads;
c.model.debug = config.model.debug;
c.model.provider = config.model.provider.c_str();
const SherpaOnnxOfflinePunctuation *punct = SherpaOnnxCreateOfflinePunctuation(&c);
return OfflinePunctuation(punct);
}
OfflinePunctuation::OfflinePunctuation(const SherpaOnnxOfflinePunctuation *p)
: MoveOnly<OfflinePunctuation, SherpaOnnxOfflinePunctuation>(p) {}
void OfflinePunctuation::Destroy(const SherpaOnnxOfflinePunctuation *p) const {
SherpaOnnxDestroyOfflinePunctuation(p);
}
std::string OfflinePunctuation::AddPunctuation(const std::string &text) const {
const char *result = SherpaOfflinePunctuationAddPunct(p_, text.c_str());
std::string ans(result);
SherpaOfflinePunctuationFreeText(result);
return ans;
}
} // namespace sherpa_onnx::cxx
... ...
... ... @@ -673,6 +673,34 @@ SHERPA_ONNX_API std::string GetGitSha1();
SHERPA_ONNX_API std::string GetGitDate();
SHERPA_ONNX_API bool FileExists(const std::string &filename);
// ============================================================================
// Offline Punctuation
// ============================================================================
struct OfflinePunctuationModelConfig {
std::string ct_transformer;
int32_t num_threads = 1;
bool debug = false;
std::string provider = "cpu";
};
struct OfflinePunctuationConfig {
OfflinePunctuationModelConfig model;
};
class SHERPA_ONNX_API OfflinePunctuation
: public MoveOnly<OfflinePunctuation, SherpaOnnxOfflinePunctuation> {
public:
static OfflinePunctuation Create(const OfflinePunctuationConfig &config);
void Destroy(const SherpaOnnxOfflinePunctuation *p) const;
// Add punctuations to the input text and return it.
std::string AddPunctuation(const std::string &text) const;
private:
explicit OfflinePunctuation(const SherpaOnnxOfflinePunctuation *p);
};
} // namespace sherpa_onnx::cxx
#endif // SHERPA_ONNX_C_API_CXX_API_H_
... ...