解决新的问题，成功接入ncnn库以及实现opencv倒置画面

xuning
Commit efe656f0971c6962ba3f64deacd8fafe8002a706 efe656f0 1 parent ef58c4c7
examples/virtual-background/src/main/java/io/livekit/android/selfie/MainViewModel.kt
livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/OpencvVideoProcessor.java
livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/RVMNcnn.java
livekit-android-track-processors/src/main/jni/rvmncnn.cpp
--- a/examples/virtual-background/src/main/java/io/livekit/android/selfie/MainViewModel.kt
查看文件 @efe656f
+++ b/examples/virtual-background/src/main/java/io/livekit/android/selfie/MainViewModel.kt
查看文件 @efe656f
@@ -33,7 +33,7 @@ import io.livekit.android.room.track.CameraPosition
 import io.livekit.android.room.track.LocalVideoTrack
 import io.livekit.android.room.track.LocalVideoTrackOptions
 import io.livekit.android.room.track.video.CameraCapturerUtils
- import io.livekit.android.track.processing.video.RVMNcnn
+ import io.livekit.android.track.processing.video.OpencvVideoProcessor
 import io.livekit.android.util.LoggingLevel
 import kotlinx.coroutines.Dispatchers
 import kotlinx.coroutines.asExecutor
@@ -42,14 +42,12 @@ import livekit.org.webrtc.EglBase
 
 @OptIn(ExperimentalCamera2Interop::class)
 class MainViewModel(application: Application) : AndroidViewModel(application) {
-     val eglBase = EglBase.create()
-     private val processor = RVMNcnn(eglBase)
+ 
     init {
         LiveKit.loggingLevel = LoggingLevel.INFO
- 
     }
 
- 
+     val eglBase = EglBase.create()
     val room = LiveKit.create(
         application,
         overrides = LiveKitOverrides(
@@ -57,25 +55,14 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
         ),
     )
 
-     private val virtualBackground = (AppCompatResources.getDrawable(application, R.drawable.background) as BitmapDrawable).bitmap
- 
-     private var blur = 16f
- 
+     private val processor = OpencvVideoProcessor()
 
     private var cameraProvider: CameraCapturerUtils.CameraProvider? = null
 
-     private var imageAnalysis = ImageAnalysis.Builder()
-         .setResolutionSelector(
-             ResolutionSelector.Builder()
-                 // LocalVideoTrack has default aspect ratio 16:9 VideoPreset169.H720
-                 // ImageAnalysis of CameraX has default aspect ratio 4:3
-                 .setAspectRatioStrategy(AspectRatioStrategy.RATIO_16_9_FALLBACK_AUTO_STRATEGY)
-                 .build(),
-         )
-         .build()
+ 
 
     init {
-         CameraXHelper.createCameraProvider(ProcessLifecycleOwner.get(), arrayOf(imageAnalysis)).let {
+         CameraXHelper.createCameraProvider(ProcessLifecycleOwner.get(), arrayOf()).let {
             if (it.isSupported(application)) {
                 CameraCapturerUtils.registerCameraProvider(it)
                 cameraProvider = it
@@ -99,46 +86,28 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
         super.onCleared()
         track.value?.stopCapture()
         room.release()
-         processor.dispose()
+ 
         cameraProvider?.let {
             CameraCapturerUtils.unregisterCameraProvider(it)
         }
     }
 
     fun toggleProcessor(): Boolean {
-         val newState = !processor.enabled
-         processor.enabled = newState
-         return newState
+         // OpencvVideoProcessor 无开关，返回 true 表示占位
+         return true
     }
 
     fun decreaseBlur() {
-         // RVMNcnn 不支持 blur 调整；保留方法以兼容示例 UI，改为无操作日志
-         blur = maxOf(0f, blur - 5)
-         android.util.Log.e("MainViewModel", "RVMNcnn: decreaseBlur noop, current blur=$blur, enabled=${processor.enabled}")
+         // 无操作：OpencvVideoProcessor 不支持模糊
     }
 
     fun increaseBlur() {
-         // RVMNcnn 不支持 blur 调整；保留方法以兼容示例 UI，改为无操作日志
-         blur = minOf(50f, blur + 5)
-         android.util.Log.e("MainViewModel", "RVMNcnn: increaseBlur noop, current blur=$blur, enabled=${processor.enabled}")
+         // 无操作：OpencvVideoProcessor 不支持模糊
     }
 
     fun toggleVirtualBackground(): Boolean {
-         // 使用 RVMNcnn 的背景图接口
-         // 返回 true 表示设置了背景，false 表示清除
-         val videoTrack = track.value
-         return if (videoTrack != null) {
-             // 简单切换：如果当前未设置则设置，已设置则清除
-             // 这里无法直接读取 native 状态，使用布尔切换可根据 UI 状态驱动
-             val set = processor.updateBackgroundImage(virtualBackground)
-             if (!set) {
-                 processor.updateBackgroundImage(null)
-             }
-             set
-         } else {
-             // 未开始采集时可直接设置
-             processor.updateBackgroundImage(virtualBackground)
-         }
+         // 无操作：OpencvVideoProcessor 不支持背景图，返回 false
+         return false
     }
 
     fun flipCamera() {
--- a/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/OpencvVideoProcessor.java 0 → 100644
查看文件 @efe656f
+++ b/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/OpencvVideoProcessor.java 0 → 100644
查看文件 @efe656f
+ package io.livekit.android.track.processing.video;
+ 
+ import androidx.annotation.Nullable;
+ 
+ import io.livekit.android.room.track.video.NoDropVideoProcessor;
+ import java.nio.ByteBuffer;
+ import livekit.org.webrtc.JavaI420Buffer;
+ import livekit.org.webrtc.VideoFrame;
+ import livekit.org.webrtc.VideoSink;
+ import livekit.org.webrtc.VideoFrame.I420Buffer;
+ 
+ /**
+  * OpencvVideoProcessor
+  * - Extends NoDropVideoProcessor
+  * - Delegates all pixel processing to native (cpp) via processI420ToI420
+  * - Java avoids OpenCV, only handles buffers and frame plumbing
+  * - Output frame rotation is unified to 180
+  */
+ public class OpencvVideoProcessor extends NoDropVideoProcessor {
+ 
+     @Nullable
+     private VideoSink targetSink;
+ 
+     // Reusable direct buffers for output I420
+     private ByteBuffer outY;
+     private ByteBuffer outU;
+     private ByteBuffer outV;
+     private int outYCapacity;
+     private int outUCapacity;
+     private int outVCapacity;
+ 
+     static {
+         try {
+             System.loadLibrary("rvmncnn");
+             android.util.Log.d("OpencvVideoProcessor", "System.loadLibrary(rvmncnn) success");
+         } catch (Throwable t) {
+             android.util.Log.e("OpencvVideoProcessor", "System.loadLibrary(rvmncnn) failed", t);
+         }
+     }
+ 
+     // Core native that processes I420 in/out fully in cpp
+     private static native boolean processI420ToI420(
+             ByteBuffer y, int yStride,
+             ByteBuffer u, int uStride,
+             ByteBuffer v, int vStride,
+             int width, int height, int rotation,
+             ByteBuffer outY, int outYStride,
+             ByteBuffer outU, int outUStride,
+             ByteBuffer outV, int outVStride
+     );
+ 
+     @Override
+     public void setSink(@Nullable VideoSink sink) {
+         this.targetSink = sink;
+     }
+ 
+     @Override
+     public void onCapturerStarted(boolean started) {
+         // No GL or Surface path here.
+     }
+ 
+     @Override
+     public void onCapturerStopped() {
+         // No-op
+     }
+ 
+     @Override
+     public void onFrameCaptured(VideoFrame frame) {
+         final VideoSink sink = targetSink;
+         if (sink == null) return;
+ 
+         I420Buffer i420 = frame.getBuffer().toI420();
+         try {
+             final int width = i420.getWidth();
+             final int height = i420.getHeight();
+ 
+             final ByteBuffer y = i420.getDataY();
+             final ByteBuffer u = i420.getDataU();
+             final ByteBuffer v = i420.getDataV();
+             final int yStride = i420.getStrideY();
+             final int uStride = i420.getStrideU();
+             final int vStride = i420.getStrideV();
+ 
+             // Ensure output buffers capacity (match input strides)
+             final int needY = yStride * height;
+             final int needU = uStride * (height / 2);
+             final int needV = vStride * (height / 2);
+             ensureOutBuffers(needY, needU, needV);
+ 
+             // JNI: cpp processes fully and writes to out buffers
+             final boolean ok = processI420ToI420(
+                     y, yStride,
+                     u, uStride,
+                     v, vStride,
+                     width, height, frame.getRotation(),
+                     outY, yStride,
+                     outU, uStride,
+                     outV, vStride
+             );
+ 
+             if (!ok) {
+                 // Fallback passthrough
+                 sink.onFrame(frame);
+                 return;
+             }
+ 
+             // Copy processed planes into a freshly-allocated WebRTC buffer to avoid lifecycle issues
+             outY.position(0);
+             outU.position(0);
+             outV.position(0);
+ 
+             JavaI420Buffer outBuf = JavaI420Buffer.allocate(width, height);
+             try {
+                 // Copy Y
+                 ByteBuffer dstY = outBuf.getDataY();
+                 int dstYStride = outBuf.getStrideY();
+                 for (int r = 0; r < height; r++) {
+                     int srcPos = r * yStride;
+                     int dstPos = r * dstYStride;
+                     // copy min(width, stride) bytes per row; strides are expected to be >= width
+                     int copy = Math.min(width, yStride);
+                     byte[] row = new byte[copy];
+                     outY.position(srcPos);
+                     outY.get(row, 0, copy);
+                     dstY.position(dstPos);
+                     dstY.put(row, 0, copy);
+                 }
+ 
+                 // Copy U
+                 int h2 = height / 2;
+                 int w2 = width / 2;
+                 ByteBuffer dstU = outBuf.getDataU();
+                 int dstUStride = outBuf.getStrideU();
+                 for (int r = 0; r < h2; r++) {
+                     int srcPos = r * uStride;
+                     int dstPos = r * dstUStride;
+                     int copy = Math.min(w2, uStride);
+                     byte[] row = new byte[copy];
+                     outU.position(srcPos);
+                     outU.get(row, 0, copy);
+                     dstU.position(dstPos);
+                     dstU.put(row, 0, copy);
+                 }
+ 
+                 // Copy V
+                 ByteBuffer dstV = outBuf.getDataV();
+                 int dstVStride = outBuf.getStrideV();
+                 for (int r = 0; r < h2; r++) {
+                     int srcPos = r * vStride;
+                     int dstPos = r * dstVStride;
+                     int copy = Math.min(w2, vStride);
+                     byte[] row = new byte[copy];
+                     outV.position(srcPos);
+                     outV.get(row, 0, copy);
+                     dstV.position(dstPos);
+                     dstV.put(row, 0, copy);
+                 }
+ 
+                 // Unify rotation to 180 by metadata
+                 VideoFrame outFrame = new VideoFrame(outBuf, 180, frame.getTimestampNs());
+                 sink.onFrame(outFrame);
+                 // Do not release outFrame here; sink owns it
+             } finally {
+                 // If sink did not retain the frame (unlikely), releasing buffer is safe on GC.
+                 // We intentionally do not call outBuf.release() here to avoid double-release.
+             }
+         } finally {
+             i420.release();
+         }
+     }
+ 
+     private void ensureOutBuffers(int needY, int needU, int needV) {
+         if (outY == null || outYCapacity < needY) {
+             outYCapacity = roundUp(needY, 64);
+             outY = ByteBuffer.allocateDirect(outYCapacity);
+         }
+         if (outU == null || outUCapacity < needU) {
+             outUCapacity = roundUp(needU, 64);
+             outU = ByteBuffer.allocateDirect(outUCapacity);
+         }
+         if (outV == null || outVCapacity < needV) {
+             outVCapacity = roundUp(needV, 64);
+             outV = ByteBuffer.allocateDirect(outVCapacity);
+         }
+         outY.limit(needY).position(0);
+         outU.limit(needU).position(0);
+         outV.limit(needV).position(0);
+     }
+ 
+     private static int roundUp(int x, int a) {
+         return ((x + a - 1) / a) * a;
+     }
+ }
\ No newline at end of file
--- a/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/RVMNcnn.java 已删除 100644 → 0
查看文件 @ef58c4c
+++ b/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/RVMNcnn.java 已删除 100644 → 0
查看文件 @ef58c4c
- package io.livekit.android.track.processing.video;
- 
- import android.content.res.AssetManager;
- import android.graphics.Bitmap;
- import android.view.Surface;
- 
- import io.livekit.android.room.track.video.NoDropVideoProcessor;
- import livekit.org.webrtc.EglBase;
- import livekit.org.webrtc.SurfaceTextureHelper;
- import livekit.org.webrtc.VideoFrame;
- import livekit.org.webrtc.VideoSink;
- 
- /**
-  * RVMNcnn processor that delegates all pixel processing to native (cpp) and
-  * renders processed frames directly into a Surface provided by SurfaceTextureHelper.
-  * Java does not perform any image processing.
-  */
- public class RVMNcnn extends NoDropVideoProcessor {
- 
-     // Native JNI hooks
-     public native boolean loadModel(AssetManager mgr, int modelid, int sizeid, int intrainterid, int postprocid, int cpugpu);
-     public native boolean openCamera(int facing);
-     public native boolean closeCamera();
-     public native boolean setOutputWindow(Surface surface);
-     public native boolean setBackgroundImage(Bitmap bitmap);
-     public native boolean processFrame();
- 
-     static {
-         try {
-             System.loadLibrary("rvmncnn");
-             android.util.Log.d("RVMNcnn", "System.loadLibrary(rvmncnn) success");
-         } catch (Throwable t) {
-             android.util.Log.e("RVMNcnn", "System.loadLibrary(rvmncnn) failed", t);
-         }
-     }
- 
-     private final EglBase eglBase;
-     private final SurfaceTextureHelper surfaceTextureHelper;
-     private final Surface outputSurface;
- 
-     private VideoSink targetSink;
- 
-     /**
-      * Controls whether the native virtual background is enabled.
-      * When enabled, native renders to outputSurface and Java forwards those frames.
-      * When disabled, incoming frames are passed through to targetSink.
-      */
-     public boolean enabled = true;
- 
-     /**
-      * Facing: 0 back, 1 front. Used when starting native camera pipeline.
-      */
-     private int facing = 1;
- 
-     public RVMNcnn(EglBase eglBase) {
-         this.eglBase = eglBase;
-         this.surfaceTextureHelper = SurfaceTextureHelper.create("RVMNcnn", eglBase.getEglBaseContext());
-         this.outputSurface = new Surface(surfaceTextureHelper.getSurfaceTexture());
-     }
- 
-     @Override
-     public void onCapturerStarted(boolean started) {
-         if (started) {
-             surfaceTextureHelper.setTextureSize(640, 640);
-             android.util.Log.d("RVMNcnn", "onCapturerStarted: setOutputWindow + openCamera");
-             // Listen to frames produced from the output surface (rendered by native),
-             // and forward to target sink.
-             surfaceTextureHelper.stopListening();
-             surfaceTextureHelper.startListening(frame -> {
-                 VideoSink sink = targetSink;
-                 if (sink != null) {
-                     sink.onFrame(frame);
-                 }
-             });
- 
-             if (enabled) {
-                 // Direct native to render into our SurfaceTextureHelper's surface
-                 setOutputWindow(outputSurface);
-                 // Start native camera pipeline (cpp will process and render)
-                 openCamera(facing);
-             }
-         }
-     }
- 
-     @Override
-     public void onCapturerStopped() {
-         // Stop Java-side listening and shutdown native pipeline
-         surfaceTextureHelper.stopListening();
-         closeCamera();
-     }
- 
-     @Override
-     public void onFrameCaptured(VideoFrame frame) {
-         // If disabled, pass-through original frames.
-         if (!enabled) {
-             VideoSink sink = targetSink;
-             if (sink != null) {
-                 sink.onFrame(frame);
-             }
-             return;
-         }
-         // Enabled: Java does not process pixels nor forward original frames.
-         // Native renders processed frames into outputSurface, which we already forward above.
-         // Drop the incoming frame here.
-     }
- 
-     @Override
-     public void setSink(VideoSink sink) {
-         this.targetSink = sink;
-     }
- 
-     /**
-      * Update facing and restart native pipeline if needed.
-      * 0 = back, 1 = front.
-      */
-     public void setFacing(int facing) {
-         this.facing = facing == 0 ? 0 : 1;
-         if (enabled) {
-             // If running, restart native camera with new facing
-             closeCamera();
-             openCamera(this.facing);
-         }
-     }
- 
-     /**
-      * Update the background image used by native processor.
-      * Pass null to clear.
-      */
-     public boolean updateBackgroundImage(Bitmap bitmap) {
-         return setBackgroundImage(bitmap);
-     }
- 
-     /**
-      * Call when disposing the processor.
-      */
-     public void dispose() {
-         surfaceTextureHelper.stopListening();
-         closeCamera();
-         outputSurface.release();
-         surfaceTextureHelper.dispose();
-     }
- }
--- a/livekit-android-track-processors/src/main/jni/rvmncnn.cpp
查看文件 @efe656f
+++ b/livekit-android-track-processors/src/main/jni/rvmncnn.cpp
查看文件 @efe656f
@@ -23,6 +23,7 @@
 
 #include <string>
 #include <vector>
+ #include <cstring>
 
 #include <platform.h>
 #include <benchmark.h>
@@ -446,4 +447,95 @@ JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_RVMNcn
 
     return JNI_FALSE;
 }
+ 
+ // process I420 in/out without Java-side OpenCV
+ // signature: Java_io_livekit_android_track_processing_video_OpencvVideoProcessor_processI420ToI420
+ JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_OpencvVideoProcessor_processI420ToI420(
+     JNIEnv* env, jclass,
+     jobject yBuf, jint yStride,
+     jobject uBuf, jint uStride,
+     jobject vBuf, jint vStride,
+     jint width, jint height, jint rotation,
+     jobject outYBuf, jint outYStride,
+     jobject outUBuf, jint outUStride,
+     jobject outVBuf, jint outVStride)
+ {
+     if (!yBuf || !uBuf || !vBuf || !outYBuf || !outUBuf || !outVBuf || width <= 0 || height <= 0)
+         return JNI_FALSE;
+ 
+     uint8_t* yPtr = (uint8_t*)env->GetDirectBufferAddress(yBuf);
+     uint8_t* uPtr = (uint8_t*)env->GetDirectBufferAddress(uBuf);
+     uint8_t* vPtr = (uint8_t*)env->GetDirectBufferAddress(vBuf);
+     uint8_t* outYPtr = (uint8_t*)env->GetDirectBufferAddress(outYBuf);
+     uint8_t* outUPtr = (uint8_t*)env->GetDirectBufferAddress(outUBuf);
+     uint8_t* outVPtr = (uint8_t*)env->GetDirectBufferAddress(outVBuf);
+ 
+     if (!yPtr || !uPtr || !vPtr || !outYPtr || !outUPtr || !outVPtr)
+         return JNI_FALSE;
+ 
+     // Pack input planes with stride into a contiguous I420 buffer
+     const int yH = height;
+     const int uvH = height / 2;
+     const int yW = width;
+     const int uvW = width / 2;
+ 
+     const int ySize = yW * yH;
+     const int uSize = uvW * uvH;
+     const int vSize = uvW * uvH;
+ 
+     std::vector<uint8_t> i420_in(ySize + uSize + vSize);
+     uint8_t* inY = i420_in.data();
+     uint8_t* inU = inY + ySize;
+     uint8_t* inV = inU + uSize;
+ 
+     for (int r = 0; r < yH; ++r) {
+         memcpy(inY + r * yW, yPtr + r * yStride, yW);
+     }
+     for (int r = 0; r < uvH; ++r) {
+         memcpy(inU + r * uvW, uPtr + r * uStride, uvW);
+         memcpy(inV + r * uvW, vPtr + r * vStride, uvW);
+     }
+ 
+     // Wrap as a single-channel Mat (H + H/2) x W and convert to BGR
+     cv::Mat i420_mat(height + height / 2, width, CV_8UC1, i420_in.data());
+     cv::Mat bgr;
+     cv::cvtColor(i420_mat, bgr, cv::COLOR_YUV2BGR_I420);
+ 
+     // Process with RVM
+     {
+         ncnn::MutexLockGuard g(lock);
+         if (g_rvm) {
+             cv::Mat fgr, pha, seg;
+             g_rvm->detect(bgr, g_feats, fgr, pha, seg);
+             g_rvm->draw(bgr, fgr, pha, seg);
+         } else {
+             draw_unsupported(bgr);
+         }
+     }
+ 
+     // Convert back to I420
+     cv::Mat i420_out;
+     cv::cvtColor(bgr, i420_out, cv::COLOR_BGR2YUV_I420);
+     if (i420_out.empty() || i420_out.cols != width || i420_out.rows != height + height / 2)
+         return JNI_FALSE;
+ 
+     const uint8_t* outBase = i420_out.ptr<uint8_t>(0);
+     const uint8_t* srcY = outBase;
+     const uint8_t* srcU = srcY + ySize;
+     const uint8_t* srcV = srcU + uSize;
+ 
+     // Write back to output planes honoring strides
+     for (int r = 0; r < yH; ++r) {
+         memcpy(outYPtr + r * outYStride, srcY + r * yW, yW);
+     }
+     for (int r = 0; r < uvH; ++r) {
+         memcpy(outUPtr + r * outUStride, srcU + r * uvW, uvW);
+         memcpy(outVPtr + r * outVStride, srcV + r * uvW, uvW);
+     }
+ 
+     // We ignore input 'rotation' here and unify to 180 at Java metadata level
+     (void)rotation;
+ 
+     return JNI_TRUE;
+ }
 }