Fangjun Kuang
Committed by GitHub

Support adding puncutations to text for node-addon-api (#876)

... ... @@ -18,7 +18,7 @@ fi
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
# SHERPA_ONNX_VERSION=1.0.22
# SHERPA_ONNX_VERSION=1.0.23
if [ -z $owner ]; then
owner=k2-fsa
... ...
... ... @@ -6,6 +6,15 @@ d=nodejs-addon-examples
echo "dir: $d"
cd $d
echo "----------add punctuations----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
node ./test_punctuation.js
rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
echo "----------audio tagging----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
... ...
... ... @@ -55,7 +55,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
# SHERPA_ONNX_VERSION=1.0.22
# SHERPA_ONNX_VERSION=1.0.23
src_dir=.github/scripts/node-addon
sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json
... ...
... ... @@ -31,6 +31,12 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PAT
The following tables list the examples in this folder.
## Add punctuations to text
|File| Description|
|---|---|
|[./test_punctuation.js](./test_punctuation.js)| Add punctuations to input text using [CT transformer](https://modelscope.cn/models/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary). It supports both Chinese and English.|
## Voice activity detection (VAD)
|File| Description|
... ... @@ -309,3 +315,13 @@ git clone https://github.com/csukuangfj/sr-data
node ./test_speaker_identification.js
```
### Add punctuations
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
node ./test_punctuation.js
```
... ...
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
const sherpa_onnx = require('sherpa-onnx-node');
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
function createPunctuation() {
const config = {
model: {
ctTransformer:
'./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx',
debug: true,
numThreads: 1,
provider: 'cpu',
},
};
return new sherpa_onnx.Punctuation(config);
}
const punct = createPunctuation();
const sentences = [
'这是一个测试你好吗How are you我很好thank you are you ok谢谢你',
'我们都是木头人不会说话不会动',
'The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry',
];
console.log('---');
for (let sentence of sentences) {
const punct_text = punct.addPunct(sentence);
console.log(`Input: ${sentence}`);
console.log(`Output: ${punct_text}`);
console.log('---');
}
... ...
... ... @@ -2,6 +2,8 @@
const sherpa_onnx = require('sherpa-onnx-node');
// Please download whisper multi-lingual models from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
function createSpokenLanguageID() {
const config = {
whisper: {
... ...
... ... @@ -21,6 +21,7 @@ set(srcs
src/audio-tagging.cc
src/non-streaming-asr.cc
src/non-streaming-tts.cc
src/punctuation.cc
src/sherpa-onnx-node-addon-api.cc
src/speaker-identification.cc
src/spoken-language-identification.cc
... ...
const addon = require('./addon.js');
class Punctuation {
constructor(config) {
this.handle = addon.createOfflinePunctuation(config);
this.config = config;
}
addPunct(text) {
return addon.offlinePunctuationAddPunct(this.handle, text);
}
}
module.exports = {
Punctuation,
}
... ...
... ... @@ -6,6 +6,7 @@ const vad = require('./vad.js');
const slid = require('./spoken-language-identification.js');
const sid = require('./speaker-identification.js');
const at = require('./audio-tagg.js');
const punct = require('./punctuation.js');
module.exports = {
OnlineRecognizer: streaming_asr.OnlineRecognizer,
... ... @@ -20,4 +21,5 @@ module.exports = {
SpeakerEmbeddingExtractor: sid.SpeakerEmbeddingExtractor,
SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager,
AudioTagging: at.AudioTagging,
Punctuation: punct.Punctuation,
}
... ...
... ... @@ -166,7 +166,7 @@ static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) {
if (!info[1].IsExternal()) {
Napi::TypeError::New(
env, "You should pass a offline stream pointer as the second argument")
env, "You should pass an offline stream pointer as the second argument")
.ThrowAsJavaScriptException();
return {};
... ...
// scripts/node-addon-api/src/punctuation.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <sstream>
#include "macros.h" // NOLINT
#include "napi.h" // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
static SherpaOnnxOfflinePunctuationModelConfig GetOfflinePunctuationModelConfig(
Napi::Object obj) {
SherpaOnnxOfflinePunctuationModelConfig c;
memset(&c, 0, sizeof(c));
if (!obj.Has("model") || !obj.Get("model").IsObject()) {
return c;
}
Napi::Object o = obj.Get("model").As<Napi::Object>();
SHERPA_ONNX_ASSIGN_ATTR_STR(ct_transformer, ctTransformer);
SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
if (o.Has("debug") &&
(o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
if (o.Get("debug").IsBoolean()) {
c.debug = o.Get("debug").As<Napi::Boolean>().Value();
} else {
c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
}
}
SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
return c;
}
static Napi::External<SherpaOnnxOfflinePunctuation>
CreateOfflinePunctuationWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsObject()) {
Napi::TypeError::New(env, "You should pass an object as the only argument.")
.ThrowAsJavaScriptException();
return {};
}
Napi::Object o = info[0].As<Napi::Object>();
SherpaOnnxOfflinePunctuationConfig c;
memset(&c, 0, sizeof(c));
c.model = GetOfflinePunctuationModelConfig(o);
const SherpaOnnxOfflinePunctuation *punct =
SherpaOnnxCreateOfflinePunctuation(&c);
if (c.model.ct_transformer) {
delete[] c.model.ct_transformer;
}
if (c.model.provider) {
delete[] c.model.provider;
}
if (!punct) {
Napi::TypeError::New(env, "Please check your config!")
.ThrowAsJavaScriptException();
return {};
}
return Napi::External<SherpaOnnxOfflinePunctuation>::New(
env, const_cast<SherpaOnnxOfflinePunctuation *>(punct),
[](Napi::Env env, SherpaOnnxOfflinePunctuation *punct) {
SherpaOnnxDestroyOfflinePunctuation(punct);
});
}
static Napi::String OfflinePunctuationAddPunctWraper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(
env,
"You should pass an offline punctuation pointer as the first argument")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsString()) {
Napi::TypeError::New(env, "You should pass a string as the second argument")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxOfflinePunctuation *punct =
info[0].As<Napi::External<SherpaOnnxOfflinePunctuation>>().Data();
Napi::String js_text = info[1].As<Napi::String>();
std::string text = js_text.Utf8Value();
const char *punct_text =
SherpaOfflinePunctuationAddPunct(punct, text.c_str());
Napi::String ans = Napi::String::New(env, punct_text);
SherpaOfflinePunctuationFreeText(punct_text);
return ans;
}
void InitPunctuation(Napi::Env env, Napi::Object exports) {
exports.Set(Napi::String::New(env, "createOfflinePunctuation"),
Napi::Function::New(env, CreateOfflinePunctuationWrapper));
exports.Set(Napi::String::New(env, "offlinePunctuationAddPunct"),
Napi::Function::New(env, OfflinePunctuationAddPunctWraper));
}
... ...
... ... @@ -21,6 +21,8 @@ void InitSpeakerID(Napi::Env env, Napi::Object exports);
void InitAudioTagging(Napi::Env env, Napi::Object exports);
void InitPunctuation(Napi::Env env, Napi::Object exports);
Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitStreamingAsr(env, exports);
InitNonStreamingAsr(env, exports);
... ... @@ -31,6 +33,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitSpokenLanguageID(env, exports);
InitSpeakerID(env, exports);
InitAudioTagging(env, exports);
InitPunctuation(env, exports);
return exports;
}
... ...