Fangjun Kuang
Committed by GitHub

Add Java API example for hotwords. (#1442)

1 lib 1 lib
2 hs_err* 2 hs_err*
3 !run-*.sh 3 !run-*.sh
  4 +./hotwords_cn.txt
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +// This file shows how to use an offline transducer, i.e., non-streaming transducer,
  4 +// to decode files with hotwords support.
  5 +//
  6 +// See also
  7 +// https://k2-fsa.github.io/sherpa/onnx/hotwords/index.html#modeling-unit-is-cjkchar
  8 +import com.k2fsa.sherpa.onnx.*;
  9 +
  10 +public class NonStreamingDecodeFileTransducerHotwords {
  11 + public static void main(String[] args) {
  12 + // please refer to
  13 + // https://k2-fsa.github.io/sherpa/onnx/hotwords/index.html#modeling-unit-is-cjkchar
  14 + // to download model files
  15 + String encoder =
  16 + "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/encoder-epoch-99-avg-1.int8.onnx";
  17 + String decoder = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/decoder-epoch-99-avg-1.onnx";
  18 + String joiner = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/joiner-epoch-99-avg-1.onnx";
  19 + String tokens = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/tokens.txt";
  20 +
  21 + String waveFilename = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/test_wavs/6.wav";
  22 +
  23 + WaveReader reader = new WaveReader(waveFilename);
  24 +
  25 + OfflineTransducerModelConfig transducer =
  26 + OfflineTransducerModelConfig.builder()
  27 + .setEncoder(encoder)
  28 + .setDecoder(decoder)
  29 + .setJoiner(joiner)
  30 + .build();
  31 +
  32 + OfflineModelConfig modelConfig =
  33 + OfflineModelConfig.builder()
  34 + .setTransducer(transducer)
  35 + .setTokens(tokens)
  36 + .setNumThreads(1)
  37 + .setDebug(true)
  38 + .setModelingUnit("cjkchar")
  39 + .build();
  40 +
  41 + OfflineRecognizerConfig config =
  42 + OfflineRecognizerConfig.builder()
  43 + .setOfflineModelConfig(modelConfig)
  44 + .setDecodingMethod("modified_beam_search")
  45 + .setHotwordsFile("./hotwords_cn.txt")
  46 + .setHotwordsScore(2.0f)
  47 + .build();
  48 +
  49 + OfflineRecognizer recognizer = new OfflineRecognizer(config);
  50 + OfflineStream stream = recognizer.createStream();
  51 + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
  52 +
  53 + recognizer.decode(stream);
  54 +
  55 + String text = recognizer.getResult(stream).getText();
  56 +
  57 + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
  58 +
  59 + stream.release();
  60 + recognizer.release();
  61 + }
  62 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [ ! -f ./sherpa-onnx-conformer-zh-stateless2-2023-05-23/tokens.txt ]; then
  29 + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2
  30 + tar xvf sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2
  31 + rm sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2
  32 +fi
  33 +
  34 +if [ ! -f hotwords_cn.txt ]; then
  35 + cat > hotwords_cn.txt <<EOF
  36 +朱丽楠
  37 +EOF
  38 +fi
  39 +
  40 +java \
  41 + -Djava.library.path=$PWD/../build/lib \
  42 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  43 + NonStreamingDecodeFileTransducerHotwords.java