xuning

引入yolo8接口,仿照ncnn写

  1 +package io.livekit.android.track.processing.video;
  2 +
  3 +import androidx.annotation.Nullable;
  4 +
  5 +import io.livekit.android.room.track.video.NoDropVideoProcessor;
  6 +import java.nio.ByteBuffer;
  7 +import livekit.org.webrtc.JavaI420Buffer;
  8 +import livekit.org.webrtc.VideoFrame;
  9 +import livekit.org.webrtc.VideoSink;
  10 +import livekit.org.webrtc.VideoFrame.I420Buffer;
  11 +
  12 +/**
  13 + * YoloV8VideoProcessor
  14 + * - Extends NoDropVideoProcessor
  15 + * - Delegates all pixel processing to native (cpp) via processI420ToI420 in yolov8ncnn
  16 + * - Java avoids OpenCV, only handles buffers and frame plumbing
  17 + * - Output frame rotation is unified to 180 (metadata)
  18 + */
  19 +public class YoloV8VideoProcessor extends NoDropVideoProcessor {
  20 +
  21 + @Nullable
  22 + private VideoSink targetSink;
  23 +
  24 + // Reusable direct buffers for output I420
  25 + private ByteBuffer outY;
  26 + private ByteBuffer outU;
  27 + private ByteBuffer outV;
  28 + private int outYCapacity;
  29 + private int outUCapacity;
  30 + private int outVCapacity;
  31 +
  32 + static {
  33 + try {
  34 + System.loadLibrary("yolov8ncnn");
  35 + android.util.Log.d("YoloV8VideoProcessor", "System.loadLibrary(yolov8ncnn) success");
  36 + } catch (Throwable t) {
  37 + android.util.Log.e("YoloV8VideoProcessor", "System.loadLibrary(yolov8ncnn) failed", t);
  38 + }
  39 + }
  40 +
  41 + // Load model before processing
  42 + // taskid: 0=det_coco,1=det_oiv7,2=seg,3=pose,4=cls,5=obb
  43 + // modelid: 0..8 (n,s,m repeated per task), cpugpu: 0=cpu,1=gpu,2=turnip
  44 + public native boolean loadModel(android.content.res.AssetManager mgr, int taskid, int modelid, int cpugpu);
  45 +
  46 + // Core native that processes I420 in/out fully in cpp
  47 + private static native boolean processI420ToI420(
  48 + ByteBuffer y, int yStride,
  49 + ByteBuffer u, int uStride,
  50 + ByteBuffer v, int vStride,
  51 + int width, int height, int rotation,
  52 + ByteBuffer outY, int outYStride,
  53 + ByteBuffer outU, int outUStride,
  54 + ByteBuffer outV, int outVStride
  55 + );
  56 +
  57 + @Override
  58 + public void setSink(@Nullable VideoSink sink) {
  59 + this.targetSink = sink;
  60 + }
  61 +
  62 + @Override
  63 + public void onCapturerStarted(boolean started) {
  64 + // No GL or Surface path here.
  65 + }
  66 +
  67 + @Override
  68 + public void onCapturerStopped() {
  69 + // No-op
  70 + }
  71 +
  72 + @Override
  73 + public void onFrameCaptured(VideoFrame frame) {
  74 + final VideoSink sink = targetSink;
  75 + if (sink == null) return;
  76 +
  77 + I420Buffer i420 = frame.getBuffer().toI420();
  78 + try {
  79 + final int width = i420.getWidth();
  80 + final int height = i420.getHeight();
  81 +
  82 + final ByteBuffer y = i420.getDataY();
  83 + final ByteBuffer u = i420.getDataU();
  84 + final ByteBuffer v = i420.getDataV();
  85 + final int yStride = i420.getStrideY();
  86 + final int uStride = i420.getStrideU();
  87 + final int vStride = i420.getStrideV();
  88 +
  89 + // Ensure output buffers capacity (match input strides)
  90 + final int needY = yStride * height;
  91 + final int needU = uStride * (height / 2);
  92 + final int needV = vStride * (height / 2);
  93 + ensureOutBuffers(needY, needU, needV);
  94 +
  95 + // JNI: cpp processes fully and writes to out buffers
  96 + final boolean ok = processI420ToI420(
  97 + y, yStride,
  98 + u, uStride,
  99 + v, vStride,
  100 + width, height, frame.getRotation(),
  101 + outY, yStride,
  102 + outU, uStride,
  103 + outV, vStride
  104 + );
  105 +
  106 + if (!ok) {
  107 + // Fallback passthrough
  108 + sink.onFrame(frame);
  109 + return;
  110 + }
  111 +
  112 + // Copy processed planes into a freshly-allocated WebRTC buffer to avoid lifecycle issues
  113 + outY.position(0);
  114 + outU.position(0);
  115 + outV.position(0);
  116 +
  117 + JavaI420Buffer outBuf = JavaI420Buffer.allocate(width, height);
  118 + try {
  119 + // Copy Y
  120 + ByteBuffer dstY = outBuf.getDataY();
  121 + int dstYStride = outBuf.getStrideY();
  122 + for (int r = 0; r < height; r++) {
  123 + int srcPos = r * yStride;
  124 + int dstPos = r * dstYStride;
  125 + int copy = Math.min(width, yStride);
  126 + byte[] row = new byte[copy];
  127 + outY.position(srcPos);
  128 + outY.get(row, 0, copy);
  129 + dstY.position(dstPos);
  130 + dstY.put(row, 0, copy);
  131 + }
  132 +
  133 + // Copy U
  134 + int h2 = height / 2;
  135 + int w2 = width / 2;
  136 + ByteBuffer dstU = outBuf.getDataU();
  137 + int dstUStride = outBuf.getStrideU();
  138 + for (int r = 0; r < h2; r++) {
  139 + int srcPos = r * uStride;
  140 + int dstPos = r * dstUStride;
  141 + int copy = Math.min(w2, uStride);
  142 + byte[] row = new byte[copy];
  143 + outU.position(srcPos);
  144 + outU.get(row, 0, copy);
  145 + dstU.position(dstPos);
  146 + dstU.put(row, 0, copy);
  147 + }
  148 +
  149 + // Copy V
  150 + ByteBuffer dstV = outBuf.getDataV();
  151 + int dstVStride = outBuf.getStrideV();
  152 + for (int r = 0; r < h2; r++) {
  153 + int srcPos = r * vStride;
  154 + int dstPos = r * dstVStride;
  155 + int copy = Math.min(w2, vStride);
  156 + byte[] row = new byte[copy];
  157 + outV.position(srcPos);
  158 + outV.get(row, 0, copy);
  159 + dstV.position(dstPos);
  160 + dstV.put(row, 0, copy);
  161 + }
  162 +
  163 + // Unify rotation to 180 by metadata to align downstream assumptions
  164 + VideoFrame outFrame = new VideoFrame(outBuf, 180, frame.getTimestampNs());
  165 + sink.onFrame(outFrame);
  166 + } finally {
  167 + // Avoid double-release; buffer lifecycle managed by VideoFrame
  168 + }
  169 + } finally {
  170 + i420.release();
  171 + }
  172 + }
  173 +
  174 + private void ensureOutBuffers(int needY, int needU, int needV) {
  175 + if (outY == null || outYCapacity < needY) {
  176 + outYCapacity = roundUp(needY, 64);
  177 + outY = ByteBuffer.allocateDirect(outYCapacity);
  178 + }
  179 + if (outU == null || outUCapacity < needU) {
  180 + outUCapacity = roundUp(needU, 64);
  181 + outU = ByteBuffer.allocateDirect(outUCapacity);
  182 + }
  183 + if (outV == null || outVCapacity < needV) {
  184 + outVCapacity = roundUp(needV, 64);
  185 + outV = ByteBuffer.allocateDirect(outVCapacity);
  186 + }
  187 + outY.limit(needY).position(0);
  188 + outU.limit(needU).position(0);
  189 + outV.limit(needV).position(0);
  190 + }
  191 +
  192 + private static int roundUp(int x, int a) {
  193 + return ((x + a - 1) / a) * a;
  194 + }
  195 +}
@@ -9,3 +9,7 @@ find_package(ncnn REQUIRED) @@ -9,3 +9,7 @@ find_package(ncnn REQUIRED)
9 add_library(rvmncnn SHARED rvmncnn.cpp rvm.cpp ndkcamera.cpp opencv_processor.cpp) 9 add_library(rvmncnn SHARED rvmncnn.cpp rvm.cpp ndkcamera.cpp opencv_processor.cpp)
10 10
11 target_link_libraries(rvmncnn ncnn ${OpenCV_LIBS} camera2ndk mediandk) 11 target_link_libraries(rvmncnn ncnn ${OpenCV_LIBS} camera2ndk mediandk)
  12 +
  13 +# yolov8 ncnn target
  14 +add_library(yolov8ncnn SHARED yolov8ncnn.cpp yolov8.cpp yolov8_det.cpp yolov8_seg.cpp yolov8_pose.cpp yolov8_cls.cpp yolov8_obb.cpp ndkcamera.cpp)
  15 +target_link_libraries(yolov8ncnn ncnn ${OpenCV_LIBS} camera2ndk mediandk)
@@ -265,6 +265,187 @@ JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_loadModel(JNIE @@ -265,6 +265,187 @@ JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_loadModel(JNIE
265 return JNI_TRUE; 265 return JNI_TRUE;
266 } 266 }
267 267
  268 +// duplicate loadModel for OpencvVideoProcessor
  269 +JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_OpencvVideoProcessor_loadModel(JNIEnv* env, jobject thiz, jobject assetManager, jint taskid, jint modelid, jint cpugpu)
  270 +{
  271 + if (taskid < 0 || taskid > 5 || modelid < 0 || modelid > 8 || cpugpu < 0 || cpugpu > 2)
  272 + {
  273 + return JNI_FALSE;
  274 + }
  275 +
  276 + AAssetManager* mgr = AAssetManager_fromJava(env, assetManager);
  277 +
  278 + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "loadModel %p (OpencvVideoProcessor)", mgr);
  279 +
  280 + const char* tasknames[6] =
  281 + {
  282 + "",
  283 + "_oiv7",
  284 + "_seg",
  285 + "_pose",
  286 + "_cls",
  287 + "_obb"
  288 + };
  289 +
  290 + const char* modeltypes[9] =
  291 + {
  292 + "n",
  293 + "s",
  294 + "m",
  295 + "n",
  296 + "s",
  297 + "m",
  298 + "n",
  299 + "s",
  300 + "m"
  301 + };
  302 +
  303 + std::string parampath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.param";
  304 + std::string modelpath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.bin";
  305 + bool use_gpu = (int)cpugpu == 1;
  306 + bool use_turnip = (int)cpugpu == 2;
  307 +
  308 + {
  309 + ncnn::MutexLockGuard g(lock);
  310 +
  311 + {
  312 + static int old_taskid = 0;
  313 + static int old_modelid = 0;
  314 + static int old_cpugpu = 0;
  315 + if (taskid != old_taskid || (modelid % 3) != old_modelid || cpugpu != old_cpugpu)
  316 + {
  317 + // taskid or model or cpugpu changed
  318 + delete g_yolov8;
  319 + g_yolov8 = 0;
  320 + }
  321 + old_taskid = taskid;
  322 + old_modelid = modelid % 3;
  323 + old_cpugpu = cpugpu;
  324 +
  325 + ncnn::destroy_gpu_instance();
  326 +
  327 + if (use_turnip)
  328 + {
  329 + ncnn::create_gpu_instance("libvulkan_freedreno.so");
  330 + }
  331 + else if (use_gpu)
  332 + {
  333 + ncnn::create_gpu_instance();
  334 + }
  335 +
  336 + if (!g_yolov8)
  337 + {
  338 + if (taskid == 0) g_yolov8 = new YOLOv8_det_coco;
  339 + if (taskid == 1) g_yolov8 = new YOLOv8_det_oiv7;
  340 + if (taskid == 2) g_yolov8 = new YOLOv8_seg;
  341 + if (taskid == 3) g_yolov8 = new YOLOv8_pose;
  342 + if (taskid == 4) g_yolov8 = new YOLOv8_cls;
  343 + if (taskid == 5) g_yolov8 = new YOLOv8_obb;
  344 +
  345 + g_yolov8->load(mgr, parampath.c_str(), modelpath.c_str(), use_gpu || use_turnip);
  346 + }
  347 + int target_size = 320;
  348 + if ((int)modelid >= 3)
  349 + target_size = 480;
  350 + if ((int)modelid >= 6)
  351 + target_size = 640;
  352 + g_yolov8->set_det_target_size(target_size);
  353 + }
  354 + }
  355 +
  356 + return JNI_TRUE;
  357 +}
  358 +
  359 +// duplicate loadModel for YoloV8VideoProcessor
  360 +JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_YoloV8VideoProcessor_loadModel(JNIEnv* env, jobject thiz, jobject assetManager, jint taskid, jint modelid, jint cpugpu)
  361 +{
  362 + if (taskid < 0 || taskid > 5 || modelid < 0 || modelid > 8 || cpugpu < 0 || cpugpu > 2)
  363 + {
  364 + return JNI_FALSE;
  365 + }
  366 +
  367 + AAssetManager* mgr = AAssetManager_fromJava(env, assetManager);
  368 +
  369 + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "loadModel %p (YoloV8VideoProcessor)", mgr);
  370 +
  371 + const char* tasknames[6] =
  372 + {
  373 + "",
  374 + "_oiv7",
  375 + "_seg",
  376 + "_pose",
  377 + "_cls",
  378 + "_obb"
  379 + };
  380 +
  381 + const char* modeltypes[9] =
  382 + {
  383 + "n",
  384 + "s",
  385 + "m",
  386 + "n",
  387 + "s",
  388 + "m",
  389 + "n",
  390 + "s",
  391 + "m"
  392 + };
  393 +
  394 + std::string parampath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.param";
  395 + std::string modelpath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.bin";
  396 + bool use_gpu = (int)cpugpu == 1;
  397 + bool use_turnip = (int)cpugpu == 2;
  398 +
  399 + {
  400 + ncnn::MutexLockGuard g(lock);
  401 +
  402 + {
  403 + static int old_taskid = 0;
  404 + static int old_modelid = 0;
  405 + static int old_cpugpu = 0;
  406 + if (taskid != old_taskid || (modelid % 3) != old_modelid || cpugpu != old_cpugpu)
  407 + {
  408 + delete g_yolov8;
  409 + g_yolov8 = 0;
  410 + }
  411 + old_taskid = taskid;
  412 + old_modelid = modelid % 3;
  413 + old_cpugpu = cpugpu;
  414 +
  415 + ncnn::destroy_gpu_instance();
  416 +
  417 + if (use_turnip)
  418 + {
  419 + ncnn::create_gpu_instance("libvulkan_freedreno.so");
  420 + }
  421 + else if (use_gpu)
  422 + {
  423 + ncnn::create_gpu_instance();
  424 + }
  425 +
  426 + if (!g_yolov8)
  427 + {
  428 + if (taskid == 0) g_yolov8 = new YOLOv8_det_coco;
  429 + if (taskid == 1) g_yolov8 = new YOLOv8_det_oiv7;
  430 + if (taskid == 2) g_yolov8 = new YOLOv8_seg;
  431 + if (taskid == 3) g_yolov8 = new YOLOv8_pose;
  432 + if (taskid == 4) g_yolov8 = new YOLOv8_cls;
  433 + if (taskid == 5) g_yolov8 = new YOLOv8_obb;
  434 +
  435 + g_yolov8->load(mgr, parampath.c_str(), modelpath.c_str(), use_gpu || use_turnip);
  436 + }
  437 + int target_size = 320;
  438 + if ((int)modelid >= 3)
  439 + target_size = 480;
  440 + if ((int)modelid >= 6)
  441 + target_size = 640;
  442 + g_yolov8->set_det_target_size(target_size);
  443 + }
  444 + }
  445 +
  446 + return JNI_TRUE;
  447 +}
  448 +
268 // public native boolean openCamera(int facing); 449 // public native boolean openCamera(int facing);
269 JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_openCamera(JNIEnv* env, jobject thiz, jint facing) 450 JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_openCamera(JNIEnv* env, jobject thiz, jint facing)
270 { 451 {
@@ -300,4 +481,114 @@ JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_setOutputWindo @@ -300,4 +481,114 @@ JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_setOutputWindo
300 return JNI_TRUE; 481 return JNI_TRUE;
301 } 482 }
302 483
  484 +// process I420 in/out without Java-side OpenCV
  485 +// signature: Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_processI420ToI420
  486 +JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_YoloV8VideoProcessor_processI420ToI420(
  487 + JNIEnv* env, jclass,
  488 + jobject yBuf, jint yStride,
  489 + jobject uBuf, jint uStride,
  490 + jobject vBuf, jint vStride,
  491 + jint width, jint height, jint rotation,
  492 + jobject outYBuf, jint outYStride,
  493 + jobject outUBuf, jint outUStride,
  494 + jobject outVBuf, jint outVStride)
  495 +{
  496 + if (!yBuf || !uBuf || !vBuf || !outYBuf || !outUBuf || !outVBuf || width <= 0 || height <= 0)
  497 + return JNI_FALSE;
  498 +
  499 + uint8_t* yPtr = (uint8_t*)env->GetDirectBufferAddress(yBuf);
  500 + uint8_t* uPtr = (uint8_t*)env->GetDirectBufferAddress(uBuf);
  501 + uint8_t* vPtr = (uint8_t*)env->GetDirectBufferAddress(vBuf);
  502 + uint8_t* outYPtr = (uint8_t*)env->GetDirectBufferAddress(outYBuf);
  503 + uint8_t* outUPtr = (uint8_t*)env->GetDirectBufferAddress(outUBuf);
  504 + uint8_t* outVPtr = (uint8_t*)env->GetDirectBufferAddress(outVBuf);
  505 +
  506 + if (!yPtr || !uPtr || !vPtr || !outYPtr || !outUPtr || !outVPtr)
  507 + return JNI_FALSE;
  508 +
  509 + // Pack input planes with stride into a contiguous I420 buffer
  510 + const int yH = height;
  511 + const int uvH = height / 2;
  512 + const int yW = width;
  513 + const int uvW = width / 2;
  514 +
  515 + const int ySize = yW * yH;
  516 + const int uSize = uvW * uvH;
  517 + const int vSize = uvW * uvH;
  518 +
  519 + std::vector<uint8_t> i420_in(ySize + uSize + vSize);
  520 + uint8_t* inY = i420_in.data();
  521 + uint8_t* inU = inY + ySize;
  522 + uint8_t* inV = inU + uSize;
  523 +
  524 + for (int r = 0; r < yH; ++r) {
  525 + memcpy(inY + r * yW, yPtr + r * yStride, yW);
  526 + }
  527 + for (int r = 0; r < uvH; ++r) {
  528 + memcpy(inU + r * uvW, uPtr + r * uStride, uvW);
  529 + memcpy(inV + r * uvW, vPtr + r * vStride, uvW);
  530 + }
  531 +
  532 + // Wrap as a single-channel Mat (H + H/2) x W and convert to RGB
  533 + cv::Mat i420_mat(height + height / 2, width, CV_8UC1, i420_in.data());
  534 + cv::Mat rgb;
  535 + cv::cvtColor(i420_mat, rgb, cv::COLOR_YUV2RGB_I420);
  536 +
  537 + // Rotate to upright orientation for the model
  538 + if (rotation == 90) {
  539 + cv::rotate(rgb, rgb, cv::ROTATE_90_CLOCKWISE);
  540 + } else if (rotation == 180) {
  541 + cv::rotate(rgb, rgb, cv::ROTATE_180);
  542 + } else if (rotation == 270) {
  543 + cv::rotate(rgb, rgb, cv::ROTATE_90_COUNTERCLOCKWISE);
  544 + }
  545 +
  546 + // Process with YOLOv8
  547 + {
  548 + ncnn::MutexLockGuard g(lock);
  549 +
  550 + if (g_yolov8)
  551 + {
  552 + std::vector<Object> objects;
  553 + g_yolov8->detect(rgb, objects);
  554 + g_yolov8->draw(rgb, objects);
  555 + }
  556 + else
  557 + {
  558 + draw_unsupported(rgb);
  559 + }
  560 + }
  561 +
  562 + // Rotate back to original orientation before returning to I420
  563 + if (rotation == 90) {
  564 + cv::rotate(rgb, rgb, cv::ROTATE_90_COUNTERCLOCKWISE);
  565 + } else if (rotation == 180) {
  566 + cv::rotate(rgb, rgb, cv::ROTATE_180);
  567 + } else if (rotation == 270) {
  568 + cv::rotate(rgb, rgb, cv::ROTATE_90_CLOCKWISE);
  569 + }
  570 +
  571 + // Convert back to I420
  572 + cv::Mat i420_out;
  573 + cv::cvtColor(rgb, i420_out, cv::COLOR_RGB2YUV_I420);
  574 + if (i420_out.empty() || i420_out.cols != width || i420_out.rows != height + height / 2)
  575 + return JNI_FALSE;
  576 +
  577 + const uint8_t* outBase = i420_out.ptr<uint8_t>(0);
  578 + const uint8_t* srcY = outBase;
  579 + const uint8_t* srcU = srcY + ySize;
  580 + const uint8_t* srcV = srcU + uSize;
  581 +
  582 + // Write back to output planes honoring strides
  583 + for (int r = 0; r < yH; ++r) {
  584 + memcpy(outYPtr + r * outYStride, srcY + r * yW, yW);
  585 + }
  586 + for (int r = 0; r < uvH; ++r) {
  587 + memcpy(outUPtr + r * outUStride, srcU + r * uvW, uvW);
  588 + memcpy(outVPtr + r * outVStride, srcV + r * uvW, uvW);
  589 + }
  590 +
  591 + return JNI_TRUE;
  592 +}
  593 +
303 } 594 }