重新启用ncnn

xuning
Commit 31ee4978cce594d0f7003fe45183c13ccd64249d 31ee4978 1 parent 618a98ef
examples/virtual-background/src/main/java/io/livekit/android/selfie/MainViewModel.kt
livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/OpenCVVideoProcessor.kt
livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/RVMNcnn.java
livekit-android-track-processors/src/main/jni/rvmncnn.cpp
--- a/examples/virtual-background/src/main/java/io/livekit/android/selfie/MainViewModel.kt
查看文件 @31ee497
+++ b/examples/virtual-background/src/main/java/io/livekit/android/selfie/MainViewModel.kt
查看文件 @31ee497
@@ -33,7 +33,7 @@ import io.livekit.android.room.track.CameraPosition
 import io.livekit.android.room.track.LocalVideoTrack
 import io.livekit.android.room.track.LocalVideoTrackOptions
 import io.livekit.android.room.track.video.CameraCapturerUtils
-import io.livekit.android.track.processing.video.VirtualBackgroundVideoProcessor
+import io.livekit.android.track.processing.video.RVMNcnn
 import io.livekit.android.util.LoggingLevel
 import kotlinx.coroutines.Dispatchers
 import kotlinx.coroutines.asExecutor
@@ -45,6 +45,16 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
     init {
         LiveKit.loggingLevel = LoggingLevel.INFO
+        // 加载 RVM 模型：mobilenetv3，目标尺寸640(sizeid=6)，intra/inter=0，postproc=1(fast)，CPU(cpugpu=0)
+        // 如需 GPU，可将 cpugpu=1 或 2(使用 turnip)
+        processor.loadModel(
+            application.getAssets(),
+            /* modelid */ 0,
+            /* sizeid */ 6,
+            /* intrainterid */ 0,
+            /* postprocid */ 1,
+            /* cpugpu */ 0
+        )
     }
     val eglBase = EglBase.create()
@@ -58,10 +68,7 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
     private val virtualBackground = (AppCompatResources.getDrawable(application, R.drawable.background) as BitmapDrawable).bitmap
     private var blur = 16f
-    private val processor = VirtualBackgroundVideoProcessor(eglBase, Dispatchers.IO, initialBlurRadius = blur).apply {
-        // 初始状态不设置背景图片，这样blur功能才能正常工作
-        // backgroundImage = virtualBackground
-    }
+    private val processor = RVMNcnn(eglBase)
     private var cameraProvider: CameraCapturerUtils.CameraProvider? = null
@@ -74,7 +81,6 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
                 .build(),
         )
         .build()
-        .apply { setAnalyzer(Dispatchers.IO.asExecutor(), processor.imageAnalyzer) }
     init {
         CameraXHelper.createCameraProvider(ProcessLifecycleOwner.get(), arrayOf(imageAnalysis)).let {
@@ -114,24 +120,32 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
     }
     fun decreaseBlur() {
-        blur = maxOf(0f, blur - 5)  // 确保blur不会小于0
-        android.util.Log.e("MainViewModel", "=== DECREASING BLUR TO: $blur, processor enabled: ${processor.enabled} ===")
-        processor.updateBlurRadius(blur)
+        // RVMNcnn 不支持 blur 调整；保留方法以兼容示例 UI，改为无操作日志
+        blur = maxOf(0f, blur - 5)
+        android.util.Log.e("MainViewModel", "RVMNcnn: decreaseBlur noop, current blur=$blur, enabled=${processor.enabled}")
     }
     fun increaseBlur() {
-        blur = minOf(50f, blur + 5)  // 限制最大blur为50，避免过度模糊
-        android.util.Log.e("MainViewModel", "=== INCREASING BLUR TO: $blur, processor enabled: ${processor.enabled} ===")
-        processor.updateBlurRadius(blur)
+        // RVMNcnn 不支持 blur 调整；保留方法以兼容示例 UI，改为无操作日志
+        blur = minOf(50f, blur + 5)
+        android.util.Log.e("MainViewModel", "RVMNcnn: increaseBlur noop, current blur=$blur, enabled=${processor.enabled}")
     }
     fun toggleVirtualBackground(): Boolean {
-        if (processor.backgroundImage != virtualBackground) {
-            processor.backgroundImage = virtualBackground
-            return true
+        // 使用 RVMNcnn 的背景图接口
+        // 返回 true 表示设置了背景，false 表示清除
+        val videoTrack = track.value
+        return if (videoTrack != null) {
+            // 简单切换：如果当前未设置则设置，已设置则清除
+            // 这里无法直接读取 native 状态，使用布尔切换可根据 UI 状态驱动
+            val set = processor.updateBackgroundImage(virtualBackground)
+            if (!set) {
+                processor.updateBackgroundImage(null)
+            }
+            set
         } else {
-            processor.backgroundImage = null
-            return false
+            // 未开始采集时可直接设置
+            processor.updateBackgroundImage(virtualBackground)
         }
     }
--- a/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/OpenCVVideoProcessor.kt 已删除 100644 → 0
查看文件 @618a98e
+++ b/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/OpenCVVideoProcessor.kt 已删除 100644 → 0
查看文件 @618a98e
-// OpenCVVideoProcessor.kt
-package io.livekit.android.track.processing.video
-
-import android.graphics.Bitmap
-import android.view.Surface
-import io.livekit.android.room.track.video.NoDropVideoProcessor
-import kotlinx.coroutines.CoroutineDispatcher
-import kotlinx.coroutines.CoroutineScope
-import kotlinx.coroutines.Dispatchers
-import kotlinx.coroutines.cancel
-import kotlinx.coroutines.channels.BufferOverflow
-import kotlinx.coroutines.flow.MutableSharedFlow
-import kotlinx.coroutines.launch
-import livekit.org.webrtc.EglBase
-import livekit.org.webrtc.EglRenderer
-import livekit.org.webrtc.GlUtil
-import livekit.org.webrtc.SurfaceTextureHelper
-import livekit.org.webrtc.VideoFrame
-import livekit.org.webrtc.VideoSink
-import org.opencv.android.Utils
-import org.opencv.core.CvType
-import org.opencv.core.Mat
-import java.nio.ByteBuffer
-import java.nio.ByteOrder
-import kotlin.math.roundToInt
-
-/**
- * OpenCV-based video processor using ncnn RVM for real-time video processing.
- * Inherits from NoDropVideoProcessor to ensure frames are processed even when not published.
- */
-class OpenCVVideoProcessor(
-    private val eglBase: EglBase,
-    dispatcher: CoroutineDispatcher = Dispatchers.Default,
-) : NoDropVideoProcessor() {
-
-    private var targetSink: VideoSink? = null
-    private val surfaceTextureHelper = SurfaceTextureHelper.create("OpenCVProcessor", eglBase.eglBaseContext)
-    private val surface = Surface(surfaceTextureHelper.surfaceTexture)
-    
-    private val eglRenderer = EglRenderer(OpenCVVideoProcessor::class.java.simpleName)
-        .apply {
-            init(eglBase.eglBaseContext, EglBase.CONFIG_PLAIN, null)
-            createEglSurface(surface)
-        }
-
-    private val scope = CoroutineScope(dispatcher)
-    private val taskFlow = MutableSharedFlow<VideoFrame>(
-        replay = 0,
-        extraBufferCapacity = 1,
-        onBufferOverflow = BufferOverflow.DROP_OLDEST,
-    )
-
-    // RVM ncnn instance
-    private val rvmNcnn = com.tencent.rvmncnn.RVMNcnn()
-
-    private var lastRotation = 0
-    private var lastWidth = 0
-    private var lastHeight = 0
-
-    /**
-     * Enable or disable RVM processing
-     */
-    var enabled: Boolean = true
-
-    /**
-     * Background image to use for virtual background
-     * If null, will use default background
-     */
-    var backgroundImage: Bitmap? = null
-
-    init {
-        // Initialize processing pipeline
-        scope.launch {
-            taskFlow.collect { frame ->
-                processFrame(frame)
-                frame.release()
-            }
-        }
-    }
-
-    override fun onCapturerStarted(started: Boolean) {
-        if (started) {
-            surfaceTextureHelper.stopListening()
-            surfaceTextureHelper.startListening { frame ->
-                targetSink?.onFrame(frame)
-            }
-        }
-    }
-
-    override fun onCapturerStopped() {
-        surfaceTextureHelper.stopListening()
-    }
-
-    override fun onFrameCaptured(frame: VideoFrame) {
-        // If disabled, just pass through
-        if (!enabled) {
-            targetSink?.onFrame(frame)
-            return
-        }
-
-        try {
-            frame.retain()
-        } catch (e: Exception) {
-            return
-        }
-
-        // Submit frame for processing
-        if (!taskFlow.tryEmit(frame)) {
-            frame.release()
-        }
-    }
-
-    override fun setSink(sink: VideoSink?) {
-        targetSink = sink
-    }
-
-    private fun processFrame(frame: VideoFrame) {
-        if (lastRotation != frame.rotation || 
-            lastWidth != frame.rotatedWidth || 
-            lastHeight != frame.rotatedHeight) {
-            
-            surfaceTextureHelper.setTextureSize(frame.rotatedWidth, frame.rotatedHeight)
-            lastRotation = frame.rotation
-            lastWidth = frame.rotatedWidth
-            lastHeight = frame.rotatedHeight
-        }
-
-        frame.retain()
-        surfaceTextureHelper.handler.post {
-            try {
-                // Convert VideoFrame to OpenCV Mat
-                val rgbaMat = videoFrameToMat(frame)
-                
-                if (rgbaMat != null && !rgbaMat.empty()) {
-                    // Process with RVM
-                    val success = rvmNcnn.processFrame(
-                        rgbaMat.nativeObjAddr,
-                        rgbaMat.cols(),
-                        rgbaMat.rows(),
-                        frame.rotation
-                    )
-                    
-                    if (success) {
-                        // Convert processed Mat back to texture and render
-                        val processedFrame = matToVideoFrame(rgbaMat, frame)
-                        eglRenderer.onFrame(processedFrame)
-                        processedFrame.release()
-                    } else {
-                        // If processing failed, pass through original frame
-                        eglRenderer.onFrame(frame)
-                    }
-                    
-                    rgbaMat.release()
-                } else {
-                    eglRenderer.onFrame(frame)
-                }
-            } catch (e: Exception) {
-                // Fallback to original frame on error
-                eglRenderer.onFrame(frame)
-            }
-            frame.release()
-        }
-    }
-
-    private fun videoFrameToMat(frame: VideoFrame): Mat? {
-        return try {
-            val i420Buffer = frame.buffer.toI420()
-            val yPlane = i420Buffer.dataY
-            val uPlane = i420Buffer.dataU
-            val vPlane = i420Buffer.dataV
-            
-            val yRowStride = i420Buffer.strideY
-            val uvRowStride = i420Buffer.strideU
-            val uvPixelStride = i420Buffer.strideU // Simplified
-            
-            val width = i420Buffer.width
-            val height = i420Buffer.height
-            
-            // Convert I420 to RGBA
-            val rgbaMat = Mat(height, width, CvType.CV_8UC4)
-            
-            // This is a simplified conversion - in production you'd want a proper YUV to RGBA conversion
-            // For now, we'll create a placeholder implementation
-            convertI420ToRGBA(yPlane, uPlane, vPlane, yRowStride, uvRowStride, uvPixelStride, width, height, rgbaMat)
-            
-            i420Buffer.release()
-            rgbaMat
-        } catch (e: Exception) {
-            null
-        }
-    }
-
-    private fun convertI420ToRGBA(
-        yPlane: ByteBuffer,
-        uPlane: ByteBuffer,
-        vPlane: ByteBuffer,
-        yRowStride: Int,
-        uvRowStride: Int,
-        uvPixelStride: Int,
-        width: Int,
-        height: Int,
-        rgbaMat: Mat
-    ) {
-        // Placeholder implementation - you'd need proper YUV to RGBA conversion
-        // This is a simplified version that just creates a test pattern
-        val rgbaData = ByteArray(width * height * 4)
-        var index = 0
-        
-        for (y in 0 until height) {
-            for (x in 0 until width) {
-                val yIndex = (y * yRowStride) + x
-                val uvIndex = ((y / 2) * uvRowStride) + ((x / 2) * uvPixelStride)
-                
-                val yValue = yPlane[yIndex].toInt() and 0xFF
-                val uValue = uPlane[uvIndex].toInt() and 0xFF
-                val vValue = vPlane[uvIndex].toInt() and 0xFF
-                
-                // Simple YUV to RGB conversion (simplified)
-                val r = (1.164 * (yValue - 16) + 1.596 * (vValue - 128)).toInt().coerceIn(0, 255)
-                val g = (1.164 * (yValue - 16) - 0.813 * (vValue - 128) - 0.391 * (uValue - 128)).toInt().coerceIn(0, 255)
-                val b = (1.164 * (yValue - 16) + 2.018 * (uValue - 128)).toInt().coerceIn(0, 255)
-                
-                rgbaData[index++] = b.toByte()
-                rgbaData[index++] = g.toByte()
-                rgbaData[index++] = r.toByte()
-                rgbaData[index++] = 255.toByte() // Alpha
-            }
-        }
-        
-        rgbaMat.put(0, 0, rgbaData)
-    }
-
-    private fun matToVideoFrame(mat: Mat, originalFrame: VideoFrame): VideoFrame {
-        // Convert RGBA Mat back to I420 buffer
-        // This is a simplified implementation - you'd need proper RGBA to I420 conversion
-        val i420Buffer = originalFrame.buffer // Reuse original buffer format for simplicity
-        
-        // In production, you'd convert the RGBA mat back to I420 format
-        // and create a new VideoFrame with the processed data
-        
-        return originalFrame // Placeholder - return original frame
-    }
-
-    /**
-     * Load RVM model
-     */
-    fun loadModel(
-        assetManager: android.content.res.AssetManager,
-        modelId: Int = 0,
-        sizeId: Int = 2,
-        intraInterId: Int = 0,
-        postProcId: Int = 1,
-        cpuGpu: Int = 0
-    ): Boolean {
-        return rvmNcnn.loadModel(assetManager, modelId, sizeId, intraInterId, postProcId, cpuGpu)
-    }
-
-    fun dispose() {
-        scope.cancel()
-        surfaceTextureHelper.stopListening()
-        surfaceTextureHelper.dispose()
-        surface.release()
-        eglRenderer.release()
-        GlUtil.checkNoGLES2Error("OpenCVVideoProcessor.dispose")
-    }
-}
--- a/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/RVMNcnn.java
查看文件 @31ee497
+++ b/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/RVMNcnn.java
查看文件 @31ee497
-package io.livekit.android.track.processing.video;// RVMNcnn.java
+package io.livekit.android.track.processing.video;
 import android.content.res.AssetManager;
+import android.graphics.Bitmap;
 import android.view.Surface;
-public class RVMNcnn
-{
+import io.livekit.android.room.track.video.NoDropVideoProcessor;
+import livekit.org.webrtc.EglBase;
+import livekit.org.webrtc.SurfaceTextureHelper;
+import livekit.org.webrtc.VideoFrame;
+import livekit.org.webrtc.VideoSink;
+
+/**
+ * RVMNcnn processor that delegates all pixel processing to native (cpp) and
+ * renders processed frames directly into a Surface provided by SurfaceTextureHelper.
+ * Java does not perform any image processing.
+ */
+public class RVMNcnn extends NoDropVideoProcessor {
+
+    // Native JNI hooks
     public native boolean loadModel(AssetManager mgr, int modelid, int sizeid, int intrainterid, int postprocid, int cpugpu);
     public native boolean openCamera(int facing);
     public native boolean closeCamera();
     public native boolean setOutputWindow(Surface surface);
-    public native boolean processFrame(long rgbaAddr, int width, int height, int rotation);
+    public native boolean setBackgroundImage(Bitmap bitmap);
     static {
         System.loadLibrary("rvmncnn");
     }
+
+    private final EglBase eglBase;
+    private final SurfaceTextureHelper surfaceTextureHelper;
+    private final Surface outputSurface;
+
+    private VideoSink targetSink;
+
+    /**
+     * Controls whether the native virtual background is enabled.
+     * When enabled, native renders to outputSurface and Java forwards those frames.
+     * When disabled, incoming frames are passed through to targetSink.
+     */
+    public boolean enabled = true;
+
+    /**
+     * Facing: 0 back, 1 front. Used when starting native camera pipeline.
+     */
+    private int facing = 1;
+
+    public RVMNcnn(EglBase eglBase) {
+        this.eglBase = eglBase;
+        this.surfaceTextureHelper = SurfaceTextureHelper.create("RVMNcnn", eglBase.getEglBaseContext());
+        this.outputSurface = new Surface(surfaceTextureHelper.getSurfaceTexture());
+    }
+
+    @Override
+    public void onCapturerStarted(boolean started) {
+        if (started) {
+            // Listen to frames produced from the output surface (rendered by native),
+            // and forward to target sink.
+            surfaceTextureHelper.stopListening();
+            surfaceTextureHelper.startListening(frame -> {
+                VideoSink sink = targetSink;
+                if (sink != null) {
+                    sink.onFrame(frame);
+                }
+            });
+
+            if (enabled) {
+                // Direct native to render into our SurfaceTextureHelper's surface
+                setOutputWindow(outputSurface);
+                // Start native camera pipeline (cpp will process and render)
+                openCamera(facing);
+            }
+        }
+    }
+
+    @Override
+    public void onCapturerStopped() {
+        // Stop Java-side listening and shutdown native pipeline
+        surfaceTextureHelper.stopListening();
+        closeCamera();
+    }
+
+    @Override
+    public void onFrameCaptured(VideoFrame frame) {
+        // If disabled, pass-through original frames.
+        if (!enabled) {
+            VideoSink sink = targetSink;
+            if (sink != null) {
+                sink.onFrame(frame);
+            }
+            return;
+        }
+        // Enabled: Java does not process pixels nor forward original frames.
+        // Native renders processed frames into outputSurface, which we already forward above.
+        // Drop the incoming frame here.
+    }
+
+    @Override
+    public void setSink(VideoSink sink) {
+        this.targetSink = sink;
+    }
+
+    /**
+     * Update facing and restart native pipeline if needed.
+     * 0 = back, 1 = front.
+     */
+    public void setFacing(int facing) {
+        this.facing = facing == 0 ? 0 : 1;
+        if (enabled) {
+            // If running, restart native camera with new facing
+            closeCamera();
+            openCamera(this.facing);
+        }
+    }
+
+    /**
+     * Update the background image used by native processor.
+     * Pass null to clear.
+     */
+    public boolean updateBackgroundImage(Bitmap bitmap) {
+        return setBackgroundImage(bitmap);
+    }
+
+    /**
+     * Call when disposing the processor.
+     */
+    public void dispose() {
+        surfaceTextureHelper.stopListening();
+        closeCamera();
+        outputSurface.release();
+        surfaceTextureHelper.dispose();
+    }
 }
--- a/livekit-android-track-processors/src/main/jni/rvmncnn.cpp
查看文件 @31ee497
+++ b/livekit-android-track-processors/src/main/jni/rvmncnn.cpp
查看文件 @31ee497
@@ -142,7 +142,24 @@ void MyNdkCamera::on_image_render(cv::Mat& rgb) const
         }
     }
+    // overlay fps
     draw_fps(rgb);
+
+    // enforce target output resolution 640x640 and 180-degree rotation
+    {
+        // resize to 640x640 if needed
+        if (rgb.cols != 640 || rgb.rows != 640)
+        {
+            cv::Mat resized;
+            cv::resize(rgb, resized, cv::Size(640, 640), 0, 0, cv::INTER_LINEAR);
+            resized.copyTo(rgb);
+        }
+
+        // rotate 180 degrees
+        cv::Mat rotated;
+        cv::rotate(rgb, rotated, cv::ROTATE_180);
+        rotated.copyTo(rgb);
+    }
 }
 static MyNdkCamera* g_camera = 0;
@@ -299,6 +316,9 @@ JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_RVMNcn
     __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "setOutputWindow %p", win);
+    // Set buffer geometry to 640x640, keep current format (0)
+    ANativeWindow_setBuffersGeometry(win, 640, 640, 0);
+
     g_camera->set_window(win);
     return JNI_TRUE;