xuning

引入yolo8接口,仿照ncnn写

package io.livekit.android.track.processing.video;
import androidx.annotation.Nullable;
import io.livekit.android.room.track.video.NoDropVideoProcessor;
import java.nio.ByteBuffer;
import livekit.org.webrtc.JavaI420Buffer;
import livekit.org.webrtc.VideoFrame;
import livekit.org.webrtc.VideoSink;
import livekit.org.webrtc.VideoFrame.I420Buffer;
/**
* YoloV8VideoProcessor
* - Extends NoDropVideoProcessor
* - Delegates all pixel processing to native (cpp) via processI420ToI420 in yolov8ncnn
* - Java avoids OpenCV, only handles buffers and frame plumbing
* - Output frame rotation is unified to 180 (metadata)
*/
public class YoloV8VideoProcessor extends NoDropVideoProcessor {
@Nullable
private VideoSink targetSink;
// Reusable direct buffers for output I420
private ByteBuffer outY;
private ByteBuffer outU;
private ByteBuffer outV;
private int outYCapacity;
private int outUCapacity;
private int outVCapacity;
static {
try {
System.loadLibrary("yolov8ncnn");
android.util.Log.d("YoloV8VideoProcessor", "System.loadLibrary(yolov8ncnn) success");
} catch (Throwable t) {
android.util.Log.e("YoloV8VideoProcessor", "System.loadLibrary(yolov8ncnn) failed", t);
}
}
// Load model before processing
// taskid: 0=det_coco,1=det_oiv7,2=seg,3=pose,4=cls,5=obb
// modelid: 0..8 (n,s,m repeated per task), cpugpu: 0=cpu,1=gpu,2=turnip
public native boolean loadModel(android.content.res.AssetManager mgr, int taskid, int modelid, int cpugpu);
// Core native that processes I420 in/out fully in cpp
private static native boolean processI420ToI420(
ByteBuffer y, int yStride,
ByteBuffer u, int uStride,
ByteBuffer v, int vStride,
int width, int height, int rotation,
ByteBuffer outY, int outYStride,
ByteBuffer outU, int outUStride,
ByteBuffer outV, int outVStride
);
@Override
public void setSink(@Nullable VideoSink sink) {
this.targetSink = sink;
}
@Override
public void onCapturerStarted(boolean started) {
// No GL or Surface path here.
}
@Override
public void onCapturerStopped() {
// No-op
}
@Override
public void onFrameCaptured(VideoFrame frame) {
final VideoSink sink = targetSink;
if (sink == null) return;
I420Buffer i420 = frame.getBuffer().toI420();
try {
final int width = i420.getWidth();
final int height = i420.getHeight();
final ByteBuffer y = i420.getDataY();
final ByteBuffer u = i420.getDataU();
final ByteBuffer v = i420.getDataV();
final int yStride = i420.getStrideY();
final int uStride = i420.getStrideU();
final int vStride = i420.getStrideV();
// Ensure output buffers capacity (match input strides)
final int needY = yStride * height;
final int needU = uStride * (height / 2);
final int needV = vStride * (height / 2);
ensureOutBuffers(needY, needU, needV);
// JNI: cpp processes fully and writes to out buffers
final boolean ok = processI420ToI420(
y, yStride,
u, uStride,
v, vStride,
width, height, frame.getRotation(),
outY, yStride,
outU, uStride,
outV, vStride
);
if (!ok) {
// Fallback passthrough
sink.onFrame(frame);
return;
}
// Copy processed planes into a freshly-allocated WebRTC buffer to avoid lifecycle issues
outY.position(0);
outU.position(0);
outV.position(0);
JavaI420Buffer outBuf = JavaI420Buffer.allocate(width, height);
try {
// Copy Y
ByteBuffer dstY = outBuf.getDataY();
int dstYStride = outBuf.getStrideY();
for (int r = 0; r < height; r++) {
int srcPos = r * yStride;
int dstPos = r * dstYStride;
int copy = Math.min(width, yStride);
byte[] row = new byte[copy];
outY.position(srcPos);
outY.get(row, 0, copy);
dstY.position(dstPos);
dstY.put(row, 0, copy);
}
// Copy U
int h2 = height / 2;
int w2 = width / 2;
ByteBuffer dstU = outBuf.getDataU();
int dstUStride = outBuf.getStrideU();
for (int r = 0; r < h2; r++) {
int srcPos = r * uStride;
int dstPos = r * dstUStride;
int copy = Math.min(w2, uStride);
byte[] row = new byte[copy];
outU.position(srcPos);
outU.get(row, 0, copy);
dstU.position(dstPos);
dstU.put(row, 0, copy);
}
// Copy V
ByteBuffer dstV = outBuf.getDataV();
int dstVStride = outBuf.getStrideV();
for (int r = 0; r < h2; r++) {
int srcPos = r * vStride;
int dstPos = r * dstVStride;
int copy = Math.min(w2, vStride);
byte[] row = new byte[copy];
outV.position(srcPos);
outV.get(row, 0, copy);
dstV.position(dstPos);
dstV.put(row, 0, copy);
}
// Unify rotation to 180 by metadata to align downstream assumptions
VideoFrame outFrame = new VideoFrame(outBuf, 180, frame.getTimestampNs());
sink.onFrame(outFrame);
} finally {
// Avoid double-release; buffer lifecycle managed by VideoFrame
}
} finally {
i420.release();
}
}
private void ensureOutBuffers(int needY, int needU, int needV) {
if (outY == null || outYCapacity < needY) {
outYCapacity = roundUp(needY, 64);
outY = ByteBuffer.allocateDirect(outYCapacity);
}
if (outU == null || outUCapacity < needU) {
outUCapacity = roundUp(needU, 64);
outU = ByteBuffer.allocateDirect(outUCapacity);
}
if (outV == null || outVCapacity < needV) {
outVCapacity = roundUp(needV, 64);
outV = ByteBuffer.allocateDirect(outVCapacity);
}
outY.limit(needY).position(0);
outU.limit(needU).position(0);
outV.limit(needV).position(0);
}
private static int roundUp(int x, int a) {
return ((x + a - 1) / a) * a;
}
}
\ No newline at end of file
... ...
... ... @@ -8,4 +8,8 @@ find_package(ncnn REQUIRED)
add_library(rvmncnn SHARED rvmncnn.cpp rvm.cpp ndkcamera.cpp opencv_processor.cpp)
target_link_libraries(rvmncnn ncnn ${OpenCV_LIBS} camera2ndk mediandk)
\ No newline at end of file
target_link_libraries(rvmncnn ncnn ${OpenCV_LIBS} camera2ndk mediandk)
# yolov8 ncnn target
add_library(yolov8ncnn SHARED yolov8ncnn.cpp yolov8.cpp yolov8_det.cpp yolov8_seg.cpp yolov8_pose.cpp yolov8_cls.cpp yolov8_obb.cpp ndkcamera.cpp)
target_link_libraries(yolov8ncnn ncnn ${OpenCV_LIBS} camera2ndk mediandk)
\ No newline at end of file
... ...
... ... @@ -265,6 +265,187 @@ JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_loadModel(JNIE
return JNI_TRUE;
}
// duplicate loadModel for OpencvVideoProcessor
JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_OpencvVideoProcessor_loadModel(JNIEnv* env, jobject thiz, jobject assetManager, jint taskid, jint modelid, jint cpugpu)
{
if (taskid < 0 || taskid > 5 || modelid < 0 || modelid > 8 || cpugpu < 0 || cpugpu > 2)
{
return JNI_FALSE;
}
AAssetManager* mgr = AAssetManager_fromJava(env, assetManager);
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "loadModel %p (OpencvVideoProcessor)", mgr);
const char* tasknames[6] =
{
"",
"_oiv7",
"_seg",
"_pose",
"_cls",
"_obb"
};
const char* modeltypes[9] =
{
"n",
"s",
"m",
"n",
"s",
"m",
"n",
"s",
"m"
};
std::string parampath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.param";
std::string modelpath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.bin";
bool use_gpu = (int)cpugpu == 1;
bool use_turnip = (int)cpugpu == 2;
{
ncnn::MutexLockGuard g(lock);
{
static int old_taskid = 0;
static int old_modelid = 0;
static int old_cpugpu = 0;
if (taskid != old_taskid || (modelid % 3) != old_modelid || cpugpu != old_cpugpu)
{
// taskid or model or cpugpu changed
delete g_yolov8;
g_yolov8 = 0;
}
old_taskid = taskid;
old_modelid = modelid % 3;
old_cpugpu = cpugpu;
ncnn::destroy_gpu_instance();
if (use_turnip)
{
ncnn::create_gpu_instance("libvulkan_freedreno.so");
}
else if (use_gpu)
{
ncnn::create_gpu_instance();
}
if (!g_yolov8)
{
if (taskid == 0) g_yolov8 = new YOLOv8_det_coco;
if (taskid == 1) g_yolov8 = new YOLOv8_det_oiv7;
if (taskid == 2) g_yolov8 = new YOLOv8_seg;
if (taskid == 3) g_yolov8 = new YOLOv8_pose;
if (taskid == 4) g_yolov8 = new YOLOv8_cls;
if (taskid == 5) g_yolov8 = new YOLOv8_obb;
g_yolov8->load(mgr, parampath.c_str(), modelpath.c_str(), use_gpu || use_turnip);
}
int target_size = 320;
if ((int)modelid >= 3)
target_size = 480;
if ((int)modelid >= 6)
target_size = 640;
g_yolov8->set_det_target_size(target_size);
}
}
return JNI_TRUE;
}
// duplicate loadModel for YoloV8VideoProcessor
JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_YoloV8VideoProcessor_loadModel(JNIEnv* env, jobject thiz, jobject assetManager, jint taskid, jint modelid, jint cpugpu)
{
if (taskid < 0 || taskid > 5 || modelid < 0 || modelid > 8 || cpugpu < 0 || cpugpu > 2)
{
return JNI_FALSE;
}
AAssetManager* mgr = AAssetManager_fromJava(env, assetManager);
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "loadModel %p (YoloV8VideoProcessor)", mgr);
const char* tasknames[6] =
{
"",
"_oiv7",
"_seg",
"_pose",
"_cls",
"_obb"
};
const char* modeltypes[9] =
{
"n",
"s",
"m",
"n",
"s",
"m",
"n",
"s",
"m"
};
std::string parampath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.param";
std::string modelpath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.bin";
bool use_gpu = (int)cpugpu == 1;
bool use_turnip = (int)cpugpu == 2;
{
ncnn::MutexLockGuard g(lock);
{
static int old_taskid = 0;
static int old_modelid = 0;
static int old_cpugpu = 0;
if (taskid != old_taskid || (modelid % 3) != old_modelid || cpugpu != old_cpugpu)
{
delete g_yolov8;
g_yolov8 = 0;
}
old_taskid = taskid;
old_modelid = modelid % 3;
old_cpugpu = cpugpu;
ncnn::destroy_gpu_instance();
if (use_turnip)
{
ncnn::create_gpu_instance("libvulkan_freedreno.so");
}
else if (use_gpu)
{
ncnn::create_gpu_instance();
}
if (!g_yolov8)
{
if (taskid == 0) g_yolov8 = new YOLOv8_det_coco;
if (taskid == 1) g_yolov8 = new YOLOv8_det_oiv7;
if (taskid == 2) g_yolov8 = new YOLOv8_seg;
if (taskid == 3) g_yolov8 = new YOLOv8_pose;
if (taskid == 4) g_yolov8 = new YOLOv8_cls;
if (taskid == 5) g_yolov8 = new YOLOv8_obb;
g_yolov8->load(mgr, parampath.c_str(), modelpath.c_str(), use_gpu || use_turnip);
}
int target_size = 320;
if ((int)modelid >= 3)
target_size = 480;
if ((int)modelid >= 6)
target_size = 640;
g_yolov8->set_det_target_size(target_size);
}
}
return JNI_TRUE;
}
// public native boolean openCamera(int facing);
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_openCamera(JNIEnv* env, jobject thiz, jint facing)
{
... ... @@ -300,4 +481,114 @@ JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_setOutputWindo
return JNI_TRUE;
}
// process I420 in/out without Java-side OpenCV
// signature: Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_processI420ToI420
JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_YoloV8VideoProcessor_processI420ToI420(
JNIEnv* env, jclass,
jobject yBuf, jint yStride,
jobject uBuf, jint uStride,
jobject vBuf, jint vStride,
jint width, jint height, jint rotation,
jobject outYBuf, jint outYStride,
jobject outUBuf, jint outUStride,
jobject outVBuf, jint outVStride)
{
if (!yBuf || !uBuf || !vBuf || !outYBuf || !outUBuf || !outVBuf || width <= 0 || height <= 0)
return JNI_FALSE;
uint8_t* yPtr = (uint8_t*)env->GetDirectBufferAddress(yBuf);
uint8_t* uPtr = (uint8_t*)env->GetDirectBufferAddress(uBuf);
uint8_t* vPtr = (uint8_t*)env->GetDirectBufferAddress(vBuf);
uint8_t* outYPtr = (uint8_t*)env->GetDirectBufferAddress(outYBuf);
uint8_t* outUPtr = (uint8_t*)env->GetDirectBufferAddress(outUBuf);
uint8_t* outVPtr = (uint8_t*)env->GetDirectBufferAddress(outVBuf);
if (!yPtr || !uPtr || !vPtr || !outYPtr || !outUPtr || !outVPtr)
return JNI_FALSE;
// Pack input planes with stride into a contiguous I420 buffer
const int yH = height;
const int uvH = height / 2;
const int yW = width;
const int uvW = width / 2;
const int ySize = yW * yH;
const int uSize = uvW * uvH;
const int vSize = uvW * uvH;
std::vector<uint8_t> i420_in(ySize + uSize + vSize);
uint8_t* inY = i420_in.data();
uint8_t* inU = inY + ySize;
uint8_t* inV = inU + uSize;
for (int r = 0; r < yH; ++r) {
memcpy(inY + r * yW, yPtr + r * yStride, yW);
}
for (int r = 0; r < uvH; ++r) {
memcpy(inU + r * uvW, uPtr + r * uStride, uvW);
memcpy(inV + r * uvW, vPtr + r * vStride, uvW);
}
// Wrap as a single-channel Mat (H + H/2) x W and convert to RGB
cv::Mat i420_mat(height + height / 2, width, CV_8UC1, i420_in.data());
cv::Mat rgb;
cv::cvtColor(i420_mat, rgb, cv::COLOR_YUV2RGB_I420);
// Rotate to upright orientation for the model
if (rotation == 90) {
cv::rotate(rgb, rgb, cv::ROTATE_90_CLOCKWISE);
} else if (rotation == 180) {
cv::rotate(rgb, rgb, cv::ROTATE_180);
} else if (rotation == 270) {
cv::rotate(rgb, rgb, cv::ROTATE_90_COUNTERCLOCKWISE);
}
// Process with YOLOv8
{
ncnn::MutexLockGuard g(lock);
if (g_yolov8)
{
std::vector<Object> objects;
g_yolov8->detect(rgb, objects);
g_yolov8->draw(rgb, objects);
}
else
{
draw_unsupported(rgb);
}
}
// Rotate back to original orientation before returning to I420
if (rotation == 90) {
cv::rotate(rgb, rgb, cv::ROTATE_90_COUNTERCLOCKWISE);
} else if (rotation == 180) {
cv::rotate(rgb, rgb, cv::ROTATE_180);
} else if (rotation == 270) {
cv::rotate(rgb, rgb, cv::ROTATE_90_CLOCKWISE);
}
// Convert back to I420
cv::Mat i420_out;
cv::cvtColor(rgb, i420_out, cv::COLOR_RGB2YUV_I420);
if (i420_out.empty() || i420_out.cols != width || i420_out.rows != height + height / 2)
return JNI_FALSE;
const uint8_t* outBase = i420_out.ptr<uint8_t>(0);
const uint8_t* srcY = outBase;
const uint8_t* srcU = srcY + ySize;
const uint8_t* srcV = srcU + uSize;
// Write back to output planes honoring strides
for (int r = 0; r < yH; ++r) {
memcpy(outYPtr + r * outYStride, srcY + r * yW, yW);
}
for (int r = 0; r < uvH; ++r) {
memcpy(outUPtr + r * outUStride, srcU + r * uvW, uvW);
memcpy(outVPtr + r * outVStride, srcV + r * uvW, uvW);
}
return JNI_TRUE;
}
}
... ...