add-punctuation.py 1.5 KB
#!/usr/bin/env python3

"""
This script shows how to add punctuations to text using sherpa-onnx Python API.

Please download the model from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models

The following is an example

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
"""

from pathlib import Path

import sherpa_onnx


def main():
    model = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx"
    if not Path(model).is_file():
        raise ValueError(f"{model} does not exist")
    config = sherpa_onnx.OfflinePunctuationConfig(
        model=sherpa_onnx.OfflinePunctuationModelConfig(ct_transformer=model),
    )

    punct = sherpa_onnx.OfflinePunctuation(config)

    text_list = [
        "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
        "我们都是木头人不会说话不会动",
        "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
    ]
    for text in text_list:
        text_with_punct = punct.add_punctuation(text)
        print("----------")
        print(f"input: {text}")
        print(f"output: {text_with_punct}")

    print("----------")


if __name__ == "__main__":
    main()