xuning

解决新的问题,成功接入ncnn库以及实现opencv倒置画面

... ... @@ -33,7 +33,7 @@ import io.livekit.android.room.track.CameraPosition
import io.livekit.android.room.track.LocalVideoTrack
import io.livekit.android.room.track.LocalVideoTrackOptions
import io.livekit.android.room.track.video.CameraCapturerUtils
import io.livekit.android.track.processing.video.RVMNcnn
import io.livekit.android.track.processing.video.OpencvVideoProcessor
import io.livekit.android.util.LoggingLevel
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.asExecutor
... ... @@ -42,14 +42,12 @@ import livekit.org.webrtc.EglBase
@OptIn(ExperimentalCamera2Interop::class)
class MainViewModel(application: Application) : AndroidViewModel(application) {
val eglBase = EglBase.create()
private val processor = RVMNcnn(eglBase)
init {
LiveKit.loggingLevel = LoggingLevel.INFO
}
val eglBase = EglBase.create()
val room = LiveKit.create(
application,
overrides = LiveKitOverrides(
... ... @@ -57,25 +55,14 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
),
)
private val virtualBackground = (AppCompatResources.getDrawable(application, R.drawable.background) as BitmapDrawable).bitmap
private var blur = 16f
private val processor = OpencvVideoProcessor()
private var cameraProvider: CameraCapturerUtils.CameraProvider? = null
private var imageAnalysis = ImageAnalysis.Builder()
.setResolutionSelector(
ResolutionSelector.Builder()
// LocalVideoTrack has default aspect ratio 16:9 VideoPreset169.H720
// ImageAnalysis of CameraX has default aspect ratio 4:3
.setAspectRatioStrategy(AspectRatioStrategy.RATIO_16_9_FALLBACK_AUTO_STRATEGY)
.build(),
)
.build()
init {
CameraXHelper.createCameraProvider(ProcessLifecycleOwner.get(), arrayOf(imageAnalysis)).let {
CameraXHelper.createCameraProvider(ProcessLifecycleOwner.get(), arrayOf()).let {
if (it.isSupported(application)) {
CameraCapturerUtils.registerCameraProvider(it)
cameraProvider = it
... ... @@ -99,46 +86,28 @@ class MainViewModel(application: Application) : AndroidViewModel(application) {
super.onCleared()
track.value?.stopCapture()
room.release()
processor.dispose()
cameraProvider?.let {
CameraCapturerUtils.unregisterCameraProvider(it)
}
}
fun toggleProcessor(): Boolean {
val newState = !processor.enabled
processor.enabled = newState
return newState
// OpencvVideoProcessor 无开关,返回 true 表示占位
return true
}
fun decreaseBlur() {
// RVMNcnn 不支持 blur 调整;保留方法以兼容示例 UI,改为无操作日志
blur = maxOf(0f, blur - 5)
android.util.Log.e("MainViewModel", "RVMNcnn: decreaseBlur noop, current blur=$blur, enabled=${processor.enabled}")
// 无操作:OpencvVideoProcessor 不支持模糊
}
fun increaseBlur() {
// RVMNcnn 不支持 blur 调整;保留方法以兼容示例 UI,改为无操作日志
blur = minOf(50f, blur + 5)
android.util.Log.e("MainViewModel", "RVMNcnn: increaseBlur noop, current blur=$blur, enabled=${processor.enabled}")
// 无操作:OpencvVideoProcessor 不支持模糊
}
fun toggleVirtualBackground(): Boolean {
// 使用 RVMNcnn 的背景图接口
// 返回 true 表示设置了背景,false 表示清除
val videoTrack = track.value
return if (videoTrack != null) {
// 简单切换:如果当前未设置则设置,已设置则清除
// 这里无法直接读取 native 状态,使用布尔切换可根据 UI 状态驱动
val set = processor.updateBackgroundImage(virtualBackground)
if (!set) {
processor.updateBackgroundImage(null)
}
set
} else {
// 未开始采集时可直接设置
processor.updateBackgroundImage(virtualBackground)
}
// 无操作:OpencvVideoProcessor 不支持背景图,返回 false
return false
}
fun flipCamera() {
... ...
package io.livekit.android.track.processing.video;
import androidx.annotation.Nullable;
import io.livekit.android.room.track.video.NoDropVideoProcessor;
import java.nio.ByteBuffer;
import livekit.org.webrtc.JavaI420Buffer;
import livekit.org.webrtc.VideoFrame;
import livekit.org.webrtc.VideoSink;
import livekit.org.webrtc.VideoFrame.I420Buffer;
/**
* OpencvVideoProcessor
* - Extends NoDropVideoProcessor
* - Delegates all pixel processing to native (cpp) via processI420ToI420
* - Java avoids OpenCV, only handles buffers and frame plumbing
* - Output frame rotation is unified to 180
*/
public class OpencvVideoProcessor extends NoDropVideoProcessor {
@Nullable
private VideoSink targetSink;
// Reusable direct buffers for output I420
private ByteBuffer outY;
private ByteBuffer outU;
private ByteBuffer outV;
private int outYCapacity;
private int outUCapacity;
private int outVCapacity;
static {
try {
System.loadLibrary("rvmncnn");
android.util.Log.d("OpencvVideoProcessor", "System.loadLibrary(rvmncnn) success");
} catch (Throwable t) {
android.util.Log.e("OpencvVideoProcessor", "System.loadLibrary(rvmncnn) failed", t);
}
}
// Core native that processes I420 in/out fully in cpp
private static native boolean processI420ToI420(
ByteBuffer y, int yStride,
ByteBuffer u, int uStride,
ByteBuffer v, int vStride,
int width, int height, int rotation,
ByteBuffer outY, int outYStride,
ByteBuffer outU, int outUStride,
ByteBuffer outV, int outVStride
);
@Override
public void setSink(@Nullable VideoSink sink) {
this.targetSink = sink;
}
@Override
public void onCapturerStarted(boolean started) {
// No GL or Surface path here.
}
@Override
public void onCapturerStopped() {
// No-op
}
@Override
public void onFrameCaptured(VideoFrame frame) {
final VideoSink sink = targetSink;
if (sink == null) return;
I420Buffer i420 = frame.getBuffer().toI420();
try {
final int width = i420.getWidth();
final int height = i420.getHeight();
final ByteBuffer y = i420.getDataY();
final ByteBuffer u = i420.getDataU();
final ByteBuffer v = i420.getDataV();
final int yStride = i420.getStrideY();
final int uStride = i420.getStrideU();
final int vStride = i420.getStrideV();
// Ensure output buffers capacity (match input strides)
final int needY = yStride * height;
final int needU = uStride * (height / 2);
final int needV = vStride * (height / 2);
ensureOutBuffers(needY, needU, needV);
// JNI: cpp processes fully and writes to out buffers
final boolean ok = processI420ToI420(
y, yStride,
u, uStride,
v, vStride,
width, height, frame.getRotation(),
outY, yStride,
outU, uStride,
outV, vStride
);
if (!ok) {
// Fallback passthrough
sink.onFrame(frame);
return;
}
// Copy processed planes into a freshly-allocated WebRTC buffer to avoid lifecycle issues
outY.position(0);
outU.position(0);
outV.position(0);
JavaI420Buffer outBuf = JavaI420Buffer.allocate(width, height);
try {
// Copy Y
ByteBuffer dstY = outBuf.getDataY();
int dstYStride = outBuf.getStrideY();
for (int r = 0; r < height; r++) {
int srcPos = r * yStride;
int dstPos = r * dstYStride;
// copy min(width, stride) bytes per row; strides are expected to be >= width
int copy = Math.min(width, yStride);
byte[] row = new byte[copy];
outY.position(srcPos);
outY.get(row, 0, copy);
dstY.position(dstPos);
dstY.put(row, 0, copy);
}
// Copy U
int h2 = height / 2;
int w2 = width / 2;
ByteBuffer dstU = outBuf.getDataU();
int dstUStride = outBuf.getStrideU();
for (int r = 0; r < h2; r++) {
int srcPos = r * uStride;
int dstPos = r * dstUStride;
int copy = Math.min(w2, uStride);
byte[] row = new byte[copy];
outU.position(srcPos);
outU.get(row, 0, copy);
dstU.position(dstPos);
dstU.put(row, 0, copy);
}
// Copy V
ByteBuffer dstV = outBuf.getDataV();
int dstVStride = outBuf.getStrideV();
for (int r = 0; r < h2; r++) {
int srcPos = r * vStride;
int dstPos = r * dstVStride;
int copy = Math.min(w2, vStride);
byte[] row = new byte[copy];
outV.position(srcPos);
outV.get(row, 0, copy);
dstV.position(dstPos);
dstV.put(row, 0, copy);
}
// Unify rotation to 180 by metadata
VideoFrame outFrame = new VideoFrame(outBuf, 180, frame.getTimestampNs());
sink.onFrame(outFrame);
// Do not release outFrame here; sink owns it
} finally {
// If sink did not retain the frame (unlikely), releasing buffer is safe on GC.
// We intentionally do not call outBuf.release() here to avoid double-release.
}
} finally {
i420.release();
}
}
private void ensureOutBuffers(int needY, int needU, int needV) {
if (outY == null || outYCapacity < needY) {
outYCapacity = roundUp(needY, 64);
outY = ByteBuffer.allocateDirect(outYCapacity);
}
if (outU == null || outUCapacity < needU) {
outUCapacity = roundUp(needU, 64);
outU = ByteBuffer.allocateDirect(outUCapacity);
}
if (outV == null || outVCapacity < needV) {
outVCapacity = roundUp(needV, 64);
outV = ByteBuffer.allocateDirect(outVCapacity);
}
outY.limit(needY).position(0);
outU.limit(needU).position(0);
outV.limit(needV).position(0);
}
private static int roundUp(int x, int a) {
return ((x + a - 1) / a) * a;
}
}
\ No newline at end of file
... ...
package io.livekit.android.track.processing.video;
import android.content.res.AssetManager;
import android.graphics.Bitmap;
import android.view.Surface;
import io.livekit.android.room.track.video.NoDropVideoProcessor;
import livekit.org.webrtc.EglBase;
import livekit.org.webrtc.SurfaceTextureHelper;
import livekit.org.webrtc.VideoFrame;
import livekit.org.webrtc.VideoSink;
/**
* RVMNcnn processor that delegates all pixel processing to native (cpp) and
* renders processed frames directly into a Surface provided by SurfaceTextureHelper.
* Java does not perform any image processing.
*/
public class RVMNcnn extends NoDropVideoProcessor {
// Native JNI hooks
public native boolean loadModel(AssetManager mgr, int modelid, int sizeid, int intrainterid, int postprocid, int cpugpu);
public native boolean openCamera(int facing);
public native boolean closeCamera();
public native boolean setOutputWindow(Surface surface);
public native boolean setBackgroundImage(Bitmap bitmap);
public native boolean processFrame();
static {
try {
System.loadLibrary("rvmncnn");
android.util.Log.d("RVMNcnn", "System.loadLibrary(rvmncnn) success");
} catch (Throwable t) {
android.util.Log.e("RVMNcnn", "System.loadLibrary(rvmncnn) failed", t);
}
}
private final EglBase eglBase;
private final SurfaceTextureHelper surfaceTextureHelper;
private final Surface outputSurface;
private VideoSink targetSink;
/**
* Controls whether the native virtual background is enabled.
* When enabled, native renders to outputSurface and Java forwards those frames.
* When disabled, incoming frames are passed through to targetSink.
*/
public boolean enabled = true;
/**
* Facing: 0 back, 1 front. Used when starting native camera pipeline.
*/
private int facing = 1;
public RVMNcnn(EglBase eglBase) {
this.eglBase = eglBase;
this.surfaceTextureHelper = SurfaceTextureHelper.create("RVMNcnn", eglBase.getEglBaseContext());
this.outputSurface = new Surface(surfaceTextureHelper.getSurfaceTexture());
}
@Override
public void onCapturerStarted(boolean started) {
if (started) {
surfaceTextureHelper.setTextureSize(640, 640);
android.util.Log.d("RVMNcnn", "onCapturerStarted: setOutputWindow + openCamera");
// Listen to frames produced from the output surface (rendered by native),
// and forward to target sink.
surfaceTextureHelper.stopListening();
surfaceTextureHelper.startListening(frame -> {
VideoSink sink = targetSink;
if (sink != null) {
sink.onFrame(frame);
}
});
if (enabled) {
// Direct native to render into our SurfaceTextureHelper's surface
setOutputWindow(outputSurface);
// Start native camera pipeline (cpp will process and render)
openCamera(facing);
}
}
}
@Override
public void onCapturerStopped() {
// Stop Java-side listening and shutdown native pipeline
surfaceTextureHelper.stopListening();
closeCamera();
}
@Override
public void onFrameCaptured(VideoFrame frame) {
// If disabled, pass-through original frames.
if (!enabled) {
VideoSink sink = targetSink;
if (sink != null) {
sink.onFrame(frame);
}
return;
}
// Enabled: Java does not process pixels nor forward original frames.
// Native renders processed frames into outputSurface, which we already forward above.
// Drop the incoming frame here.
}
@Override
public void setSink(VideoSink sink) {
this.targetSink = sink;
}
/**
* Update facing and restart native pipeline if needed.
* 0 = back, 1 = front.
*/
public void setFacing(int facing) {
this.facing = facing == 0 ? 0 : 1;
if (enabled) {
// If running, restart native camera with new facing
closeCamera();
openCamera(this.facing);
}
}
/**
* Update the background image used by native processor.
* Pass null to clear.
*/
public boolean updateBackgroundImage(Bitmap bitmap) {
return setBackgroundImage(bitmap);
}
/**
* Call when disposing the processor.
*/
public void dispose() {
surfaceTextureHelper.stopListening();
closeCamera();
outputSurface.release();
surfaceTextureHelper.dispose();
}
}
... ... @@ -23,6 +23,7 @@
#include <string>
#include <vector>
#include <cstring>
#include <platform.h>
#include <benchmark.h>
... ... @@ -446,4 +447,95 @@ JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_RVMNcn
return JNI_FALSE;
}
// process I420 in/out without Java-side OpenCV
// signature: Java_io_livekit_android_track_processing_video_OpencvVideoProcessor_processI420ToI420
JNIEXPORT jboolean JNICALL Java_io_livekit_android_track_processing_video_OpencvVideoProcessor_processI420ToI420(
JNIEnv* env, jclass,
jobject yBuf, jint yStride,
jobject uBuf, jint uStride,
jobject vBuf, jint vStride,
jint width, jint height, jint rotation,
jobject outYBuf, jint outYStride,
jobject outUBuf, jint outUStride,
jobject outVBuf, jint outVStride)
{
if (!yBuf || !uBuf || !vBuf || !outYBuf || !outUBuf || !outVBuf || width <= 0 || height <= 0)
return JNI_FALSE;
uint8_t* yPtr = (uint8_t*)env->GetDirectBufferAddress(yBuf);
uint8_t* uPtr = (uint8_t*)env->GetDirectBufferAddress(uBuf);
uint8_t* vPtr = (uint8_t*)env->GetDirectBufferAddress(vBuf);
uint8_t* outYPtr = (uint8_t*)env->GetDirectBufferAddress(outYBuf);
uint8_t* outUPtr = (uint8_t*)env->GetDirectBufferAddress(outUBuf);
uint8_t* outVPtr = (uint8_t*)env->GetDirectBufferAddress(outVBuf);
if (!yPtr || !uPtr || !vPtr || !outYPtr || !outUPtr || !outVPtr)
return JNI_FALSE;
// Pack input planes with stride into a contiguous I420 buffer
const int yH = height;
const int uvH = height / 2;
const int yW = width;
const int uvW = width / 2;
const int ySize = yW * yH;
const int uSize = uvW * uvH;
const int vSize = uvW * uvH;
std::vector<uint8_t> i420_in(ySize + uSize + vSize);
uint8_t* inY = i420_in.data();
uint8_t* inU = inY + ySize;
uint8_t* inV = inU + uSize;
for (int r = 0; r < yH; ++r) {
memcpy(inY + r * yW, yPtr + r * yStride, yW);
}
for (int r = 0; r < uvH; ++r) {
memcpy(inU + r * uvW, uPtr + r * uStride, uvW);
memcpy(inV + r * uvW, vPtr + r * vStride, uvW);
}
// Wrap as a single-channel Mat (H + H/2) x W and convert to BGR
cv::Mat i420_mat(height + height / 2, width, CV_8UC1, i420_in.data());
cv::Mat bgr;
cv::cvtColor(i420_mat, bgr, cv::COLOR_YUV2BGR_I420);
// Process with RVM
{
ncnn::MutexLockGuard g(lock);
if (g_rvm) {
cv::Mat fgr, pha, seg;
g_rvm->detect(bgr, g_feats, fgr, pha, seg);
g_rvm->draw(bgr, fgr, pha, seg);
} else {
draw_unsupported(bgr);
}
}
// Convert back to I420
cv::Mat i420_out;
cv::cvtColor(bgr, i420_out, cv::COLOR_BGR2YUV_I420);
if (i420_out.empty() || i420_out.cols != width || i420_out.rows != height + height / 2)
return JNI_FALSE;
const uint8_t* outBase = i420_out.ptr<uint8_t>(0);
const uint8_t* srcY = outBase;
const uint8_t* srcU = srcY + ySize;
const uint8_t* srcV = srcU + uSize;
// Write back to output planes honoring strides
for (int r = 0; r < yH; ++r) {
memcpy(outYPtr + r * outYStride, srcY + r * yW, yW);
}
for (int r = 0; r < uvH; ++r) {
memcpy(outUPtr + r * outUStride, srcU + r * uvW, uvW);
memcpy(outVPtr + r * outVStride, srcV + r * uvW, uvW);
}
// We ignore input 'rotation' here and unify to 180 at Java metadata level
(void)rotation;
return JNI_TRUE;
}
}
... ...