重新启用ncnn

xuning
Commit 31ee4978cce594d0f7003fe45183c13ccd64249d 31ee4978 1 parent 618a98ef
examples/virtual-background/src/main/java/io/livekit/android/selfie/MainViewModel.kt
livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/OpenCVVideoProcessor.kt
livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/RVMNcnn.java
livekit-android-track-processors/src/main/jni/rvmncnn.cpp
--- a/examples/virtual-background/src/main/java/io/livekit/android/selfie/MainViewModel.kt
查看文件 @31ee497
+++ b/examples/virtual-background/src/main/java/io/livekit/android/selfie/MainViewModel.kt
查看文件 @31ee497
@@ -33,7 +33,7 @@ import io.livekit.android.room.track.CameraPosition
 import io.livekit.android.room.track.LocalVideoTrack
 import io.livekit.android.room.track.LocalVideoTrackOptions
 import io.livekit.android.room.track.video.CameraCapturerUtils
- import io.livekit.android.track.processing.video.VirtualBackgroundVideoProcessor
+ import io.livekit.android.track.processing.video.RVMNcnn
 import io.livekit.android.util.LoggingLevel
 import kotlinx.coroutines.Dispatchers
 import kotlinx.coroutines.asExecutor
@@ -45,6 +45,16 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
 
     init {
         LiveKit.loggingLevel = LoggingLevel.INFO
+         // 加载 RVM 模型：mobilenetv3，目标尺寸640(sizeid=6)，intra/inter=0，postproc=1(fast)，CPU(cpugpu=0)
+         // 如需 GPU，可将 cpugpu=1 或 2(使用 turnip)
+         processor.loadModel(
+             application.getAssets(),
+             /* modelid */ 0,
+             /* sizeid */ 6,
+             /* intrainterid */ 0,
+             /* postprocid */ 1,
+             /* cpugpu */ 0
+         )
     }
 
     val eglBase = EglBase.create()
@@ -58,10 +68,7 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
     private val virtualBackground = (AppCompatResources.getDrawable(application, R.drawable.background) as BitmapDrawable).bitmap
 
     private var blur = 16f
-     private val processor = VirtualBackgroundVideoProcessor(eglBase, Dispatchers.IO, initialBlurRadius = blur).apply {
-         // 初始状态不设置背景图片，这样blur功能才能正常工作
-         // backgroundImage = virtualBackground
-     }
+     private val processor = RVMNcnn(eglBase)
 
     private var cameraProvider: CameraCapturerUtils.CameraProvider? = null
 
@@ -74,7 +81,6 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
                 .build(),
         )
         .build()
-         .apply { setAnalyzer(Dispatchers.IO.asExecutor(), processor.imageAnalyzer) }
 
     init {
         CameraXHelper.createCameraProvider(ProcessLifecycleOwner.get(), arrayOf(imageAnalysis)).let {
@@ -114,24 +120,32 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
     }
 
     fun decreaseBlur() {
-         blur = maxOf(0f, blur - 5)  // 确保blur不会小于0
-         android.util.Log.e("MainViewModel", "=== DECREASING BLUR TO: $blur, processor enabled: ${processor.enabled} ===")
-         processor.updateBlurRadius(blur)
+         // RVMNcnn 不支持 blur 调整；保留方法以兼容示例 UI，改为无操作日志
+         blur = maxOf(0f, blur - 5)
+         android.util.Log.e("MainViewModel", "RVMNcnn: decreaseBlur noop, current blur=$blur, enabled=${processor.enabled}")
     }
 
     fun increaseBlur() {
-         blur = minOf(50f, blur + 5)  // 限制最大blur为50，避免过度模糊
-         android.util.Log.e("MainViewModel", "=== INCREASING BLUR TO: $blur, processor enabled: ${processor.enabled} ===")
-         processor.updateBlurRadius(blur)
+         // RVMNcnn 不支持 blur 调整；保留方法以兼容示例 UI，改为无操作日志
+         blur = minOf(50f, blur + 5)
+         android.util.Log.e("MainViewModel", "RVMNcnn: increaseBlur noop, current blur=$blur, enabled=${processor.enabled}")
     }
 
     fun toggleVirtualBackground(): Boolean {
-         if (processor.backgroundImage != virtualBackground) {
-             processor.backgroundImage = virtualBackground
-             return true
+         // 使用 RVMNcnn 的背景图接口
+         // 返回 true 表示设置了背景，false 表示清除
+         val videoTrack = track.value
+         return if (videoTrack != null) {
+             // 简单切换：如果当前未设置则设置，已设置则清除
+             // 这里无法直接读取 native 状态，使用布尔切换可根据 UI 状态驱动
+             val set = processor.updateBackgroundImage(virtualBackground)
+             if (!set) {
+                 processor.updateBackgroundImage(null)
+             }
+             set
         } else {
-             processor.backgroundImage = null
-             return false
+             // 未开始采集时可直接设置
+             processor.updateBackgroundImage(virtualBackground)
         }
     }
 
--- a/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/OpenCVVideoProcessor.kt 已删除 100644 → 0
查看文件 @618a98e
+++ b/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/OpenCVVideoProcessor.kt 已删除 100644 → 0
查看文件 @618a98e
- // OpenCVVideoProcessor.kt
- package io.livekit.android.track.processing.video
- 
- import android.graphics.Bitmap
- import android.view.Surface
- import io.livekit.android.room.track.video.NoDropVideoProcessor
- import kotlinx.coroutines.CoroutineDispatcher
- import kotlinx.coroutines.CoroutineScope
- import kotlinx.coroutines.Dispatchers
- import kotlinx.coroutines.cancel
- import kotlinx.coroutines.channels.BufferOverflow
- import kotlinx.coroutines.flow.MutableSharedFlow
- import kotlinx.coroutines.launch
- import livekit.org.webrtc.EglBase
- import livekit.org.webrtc.EglRenderer
- import livekit.org.webrtc.GlUtil
- import livekit.org.webrtc.SurfaceTextureHelper
- import livekit.org.webrtc.VideoFrame
- import livekit.org.webrtc.VideoSink
- import org.opencv.android.Utils
- import org.opencv.core.CvType
- import org.opencv.core.Mat
- import java.nio.ByteBuffer
- import java.nio.ByteOrder
- import kotlin.math.roundToInt
- 
- /**
-  * OpenCV-based video processor using ncnn RVM for real-time video processing.
-  * Inherits from NoDropVideoProcessor to ensure frames are processed even when not published.
-  */
- class OpenCVVideoProcessor(
-     private val eglBase: EglBase,
-     dispatcher: CoroutineDispatcher = Dispatchers.Default,
- ) : NoDropVideoProcessor() {
- 
-     private var targetSink: VideoSink? = null
-     private val surfaceTextureHelper = SurfaceTextureHelper.create("OpenCVProcessor", eglBase.eglBaseContext)
-     private val surface = Surface(surfaceTextureHelper.surfaceTexture)
-     
-     private val eglRenderer = EglRenderer(OpenCVVideoProcessor::class.java.simpleName)
-         .apply {
-             init(eglBase.eglBaseContext, EglBase.CONFIG_PLAIN, null)
-             createEglSurface(surface)
-         }
- 
-     private val scope = CoroutineScope(dispatcher)
-     private val taskFlow = MutableSharedFlow<VideoFrame>(
-         replay = 0,
-         extraBufferCapacity = 1,
-         onBufferOverflow = BufferOverflow.DROP_OLDEST,
-     )
- 
-     // RVM ncnn instance
-     private val rvmNcnn = com.tencent.rvmncnn.RVMNcnn()
- 
-     private var lastRotation = 0
-     private var lastWidth = 0
-     private var lastHeight = 0
- 
-     /**
-      * Enable or disable RVM processing
-      */
-     var enabled: Boolean = true
- 
-     /**
-      * Background image to use for virtual background
-      * If null, will use default background
-      */
-     var backgroundImage: Bitmap? = null
- 
-     init {
-         // Initialize processing pipeline
-         scope.launch {
-             taskFlow.collect { frame ->
-                 processFrame(frame)
-                 frame.release()
-             }
-         }
-     }
- 
-     override fun onCapturerStarted(started: Boolean) {
-         if (started) {
-             surfaceTextureHelper.stopListening()
-             surfaceTextureHelper.startListening { frame ->
-                 targetSink?.onFrame(frame)
-             }
-         }
-     }
- 
-     override fun onCapturerStopped() {
-         surfaceTextureHelper.stopListening()
-     }
- 
-     override fun onFrameCaptured(frame: VideoFrame) {
-         // If disabled, just pass through
-         if (!enabled) {
-             targetSink?.onFrame(frame)
-             return
-         }
- 
-         try {
-             frame.retain()
-         } catch (e: Exception) {
-             return
-         }
- 
-         // Submit frame for processing
-         if (!taskFlow.tryEmit(frame)) {
-             frame.release()
-         }
-     }
- 
-     override fun setSink(sink: VideoSink?) {
-         targetSink = sink
-     }
- 
-     private fun processFrame(frame: VideoFrame) {
-         if (lastRotation != frame.rotation || 
-             lastWidth != frame.rotatedWidth || 
-             lastHeight != frame.rotatedHeight) {
-             
-             surfaceTextureHelper.setTextureSize(frame.rotatedWidth, frame.rotatedHeight)
-             lastRotation = frame.rotation
-             lastWidth = frame.rotatedWidth
-             lastHeight = frame.rotatedHeight
-         }
- 
-         frame.retain()
-         surfaceTextureHelper.handler.post {
-             try {
-                 // Convert VideoFrame to OpenCV Mat
-                 val rgbaMat = videoFrameToMat(frame)
-                 
-                 if (rgbaMat != null && !rgbaMat.empty()) {
-                     // Process with RVM
-                     val success = rvmNcnn.processFrame(
-                         rgbaMat.nativeObjAddr,
-                         rgbaMat.cols(),
-                         rgbaMat.rows(),
-                         frame.rotation
-                     )
-                     
-                     if (success) {
-                         // Convert processed Mat back to texture and render
-                         val processedFrame = matToVideoFrame(rgbaMat, frame)
-                         eglRenderer.onFrame(processedFrame)
-                         processedFrame.release()
-                     } else {
-                         // If processing failed, pass through original frame
-                         eglRenderer.onFrame(frame)
-                     }
-                     
-                     rgbaMat.release()
-                 } else {
-                     eglRenderer.onFrame(frame)
-                 }
-             } catch (e: Exception) {
-                 // Fallback to original frame on error
-                 eglRenderer.onFrame(frame)
-             }
-             frame.release()
-         }
-     }
- 
-     private fun videoFrameToMat(frame: VideoFrame): Mat? {
-         return try {
-             val i420Buffer = frame.buffer.toI420()
-             val yPlane = i420Buffer.dataY
-             val uPlane = i420Buffer.dataU
-             val vPlane = i420Buffer.dataV
-             
-             val yRowStride = i420Buffer.strideY
-             val uvRowStride = i420Buffer.strideU
-             val uvPixelStride = i420Buffer.strideU // Simplified
-             
-             val width = i420Buffer.width
-             val height = i420Buffer.height
-             
-             // Convert I420 to RGBA
-             val rgbaMat = Mat(height, width, CvType.CV_8UC4)
-             
-             // This is a simplified conversion - in production you'd want a proper YUV to RGBA conversion
-             // For now, we'll create a placeholder implementation
-             convertI420ToRGBA(yPlane, uPlane, vPlane, yRowStride, uvRowStride, uvPixelStride, width, height, rgbaMat)
-             
-             i420Buffer.release()
-             rgbaMat
-         } catch (e: Exception) {
-             null
-         }
-     }
- 
-     private fun convertI420ToRGBA(
-         yPlane: ByteBuffer,
-         uPlane: ByteBuffer,
-         vPlane: ByteBuffer,
-         yRowStride: Int,
-         uvRowStride: Int,
-         uvPixelStride: Int,
-         width: Int,
-         height: Int,
-         rgbaMat: Mat
-     ) {
-         // Placeholder implementation - you'd need proper YUV to RGBA conversion
-         // This is a simplified version that just creates a test pattern
-         val rgbaData = ByteArray(width * height * 4)
-         var index = 0
-         
-         for (y in 0 until height) {
-             for (x in 0 until width) {
-                 val yIndex = (y * yRowStride) + x
-                 val uvIndex = ((y / 2) * uvRowStride) + ((x / 2) * uvPixelStride)
-                 
-                 val yValue = yPlane[yIndex].toInt() and 0xFF
-                 val uValue = uPlane[uvIndex].toInt() and 0xFF
-                 val vValue = vPlane[uvIndex].toInt() and 0xFF
-                 
-                 // Simple YUV to RGB conversion (simplified)
-                 val r = (1.164 * (yValue - 16) + 1.596 * (vValue - 128)).toInt().coerceIn(0, 255)
-                 val g = (1.164 * (yValue - 16) - 0.813 * (vValue - 128) - 0.391 * (uValue - 128)).toInt().coerceIn(0, 255)
-                 val b = (1.164 * (yValue - 16) + 2.018 * (uValue - 128)).toInt().coerceIn(0, 255)
-                 
-                 rgbaData[index++] = b.toByte()
-                 rgbaData[index++] = g.toByte()
-                 rgbaData[index++] = r.toByte()
-                 rgbaData[index++] = 255.toByte() // Alpha
-             }
-         }
-         
-         rgbaMat.put(0, 0, rgbaData)
-     }
- 
-     private fun matToVideoFrame(mat: Mat, originalFrame: VideoFrame): VideoFrame {
-         // Convert RGBA Mat back to I420 buffer
-         // This is a simplified implementation - you'd need proper RGBA to I420 conversion
-         val i420Buffer = originalFrame.buffer // Reuse original buffer format for simplicity
-         
-         // In production, you'd convert the RGBA mat back to I420 format
-         // and create a new VideoFrame with the processed data
-         
-         return originalFrame // Placeholder - return original frame
-     }
- 
-     /**
-      * Load RVM model
-      */
-     fun loadModel(
-         assetManager: android.content.res.AssetManager,
-         modelId: Int = 0,
-         sizeId: Int = 2,
-         intraInterId: Int = 0,
-         postProcId: Int = 1,
-         cpuGpu: Int = 0
-     ): Boolean {
-         return rvmNcnn.loadModel(assetManager, modelId, sizeId, intraInterId, postProcId, cpuGpu)
-     }
- 
-     fun dispose() {
-         scope.cancel()
-         surfaceTextureHelper.stopListening()
-         surfaceTextureHelper.dispose()
-         surface.release()
-         eglRenderer.release()
-         GlUtil.checkNoGLES2Error("OpenCVVideoProcessor.dispose")
-     }
- }
\ No newline at end of file
--- a/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/RVMNcnn.java
查看文件 @31ee497
+++ b/livekit-android-track-processors/src/main/java/io/livekit/android/track/processing/video/RVMNcnn.java
查看文件 @31ee497
- package io.livekit.android.track.processing.video;// RVMNcnn.java
+ package io.livekit.android.track.processing.video;
 
 import android.content.res.AssetManager;
+ import android.graphics.Bitmap;
 import android.view.Surface;
 
- public class RVMNcnn
- {
+ import io.livekit.android.room.track.video.NoDropVideoProcessor;
+ import livekit.org.webrtc.EglBase;
+ import livekit.org.webrtc.SurfaceTextureHelper;
+ import livekit.org.webrtc.VideoFrame;
+ import livekit.org.webrtc.VideoSink;
+ 
+ /**
+  * RVMNcnn processor that delegates all pixel processing to native (cpp) and
+  * renders processed frames directly into a Surface provided by SurfaceTextureHelper.
+  * Java does not perform any image processing.
+  */
+ public class RVMNcnn extends NoDropVideoProcessor {
+ 
+     // Native JNI hooks
     public native boolean loadModel(AssetManager mgr, int modelid, int sizeid, int intrainterid, int postprocid, int cpugpu);
     public native boolean openCamera(int facing);
     public native boolean closeCamera();
     public native boolean setOutputWindow(Surface surface);
-     public native boolean processFrame(long rgbaAddr, int width, int height, int rotation);
+     public native boolean setBackgroundImage(Bitmap bitmap);
 
     static {
         System.loadLibrary("rvmncnn");
     }
+ 
+     private final EglBase eglBase;
+     private final SurfaceTextureHelper surfaceTextureHelper;
+     private final Surface outputSurface;
+ 
+     private VideoSink targetSink;
+ 
+     /**
+      * Controls whether the native virtual background is enabled.
+      * When enabled, native renders to outputSurface and Java forwards those frames.
+      * When disabled, incoming frames are passed through to targetSink.
+      */
+     public boolean enabled = true;
+ 
+     /**
+      * Facing: 0 back, 1 front. Used when starting native camera pipeline.
+      */
+     private int facing = 1;
+ 
+     public RVMNcnn(EglBase eglBase) {
+         this.eglBase = eglBase;
+         this.surfaceTextureHelper = SurfaceTextureHelper.create("RVMNcnn", eglBase.getEglBaseContext());
+         this.outputSurface = new Surface(surfaceTextureHelper.getSurfaceTexture());
+     }
+ 
+     @Override
+     public void onCapturerStarted(boolean started) {
+         if (started) {
+             // Listen to frames produced from the output surface (rendered by native),
+             // and forward to target sink.
+             surfaceTextureHelper.stopListening();
+             surfaceTextureHelper.startListening(frame -> {
+                 VideoSink sink = targetSink;
+                 if (sink != null) {
+                     sink.onFrame(frame);
+                 }
+             });
+ 
+             if (enabled) {
+                 // Direct native to render into our SurfaceTextureHelper's surface
+                 setOutputWindow(outputSurface);
+                 // Start native camera pipeline (cpp will process and render)
+                 openCamera(facing);
+             }
+         }
+     }
+ 
+     @Override
+     public void onCapturerStopped() {
+         // Stop Java-side listening and shutdown native pipeline
+         surfaceTextureHelper.stopListening();
+         closeCamera();
+     }
+ 
+     @Override
+     public void onFrameCaptured(VideoFrame frame) {
+         // If disabled, pass-through original frames.
+         if (!enabled) {
+             VideoSink sink = targetSink;
+             if (sink != null) {
+                 sink.onFrame(frame);
+             }
+             return;
+         }
+         // Enabled: Java does not process pixels nor forward original frames.
+         // Native renders processed frames into outputSurface, which we already forward above.
+         // Drop the incoming frame here.
+     }
+ 
+     @Override
+     public void setSink(VideoSink sink) {
+         this.targetSink = sink;
+     }
+ 
+     /**
+      * Update facing and restart native pipeline if needed.
+      * 0 = back, 1 = front.
+      */
+     public void setFacing(int facing) {
+         this.facing = facing == 0 ? 0 : 1;
+         if (enabled) {
+             // If running, restart native camera with new facing
+             closeCamera();
+             openCamera(this.facing);
+         }
+     }
+ 
+     /**
+      * Update the background image used by native processor.
+      * Pass null to clear.
+      */
+     public boolean updateBackgroundImage(Bitmap bitmap) {
+         return setBackgroundImage(bitmap);
+     }
+ 
+     /**
+      * Call when disposing the processor.
+      */
+     public void dispose() {
+         surfaceTextureHelper.stopListening();
+         closeCamera();
+         outputSurface.release();
+         surfaceTextureHelper.dispose();
+     }
 }
--- a/livekit-android-track-processors/src/main/jni/rvmncnn.cpp
查看文件 @31ee497
+++ b/livekit-android-track-processors/src/main/jni/rvmncnn.cpp
查看文件 @31ee497
@@ -142,7 +142,24 @@ void MyNdkCamera::on_image_render(cv::Mat& rgb) const
         }
     }
 
+     // overlay fps
     draw_fps(rgb);
+ 
+     // enforce target output resolution 640x640 and 180-degree rotation
+     {
+         // resize to 640x640 if needed
+         if (rgb.cols != 640 || rgb.rows != 640)
+         {
+             cv::Mat resized;
+             cv::resize(rgb, resized, cv::Size(640, 640), 0, 0, cv::INTER_LINEAR);
+             resized.copyTo(rgb);
+         }
+ 
+         // rotate 180 degrees
+         cv::Mat rotated;
+         cv::rotate(rgb, rotated, cv::ROTATE_180);
+         rotated.copyTo(rgb);
+     }
 }
 
 static MyNdkCamera* g_camera = 0;
@@ -299,6 +316,9 @@ JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_RVMNcn
 
     __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "setOutputWindow %p", win);
 
+     // Set buffer geometry to 640x640, keep current format (0)
+     ANativeWindow_setBuffersGeometry(win, 640, 640, 0);
+ 
     g_camera->set_window(win);
 
     return JNI_TRUE;