引入yolo8

xuning
Commit b55f96188776290ac5c29c4a38a10a8b7a5cd732 b55f9618 1 parent f8c7719d
livekit-android-track-processors/src/main/jni/yolov8.cpp
livekit-android-track-processors/src/main/jni/yolov8.h
livekit-android-track-processors/src/main/jni/yolov8_cls.cpp
livekit-android-track-processors/src/main/jni/yolov8_det.cpp
livekit-android-track-processors/src/main/jni/yolov8_obb.cpp
livekit-android-track-processors/src/main/jni/yolov8_pose.cpp
livekit-android-track-processors/src/main/jni/yolov8_seg.cpp
livekit-android-track-processors/src/main/jni/yolov8ncnn.cpp
--- a/livekit-android-track-processors/src/main/jni/yolov8.cpp 0 → 100644
查看文件 @b55f961
+++ b/livekit-android-track-processors/src/main/jni/yolov8.cpp 0 → 100644
查看文件 @b55f961
+ // Tencent is pleased to support the open source community by making ncnn available.
+ //
+ // Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
+ //
+ // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ // in compliance with the License. You may obtain a copy of the License at
+ //
+ // https://opensource.org/licenses/BSD-3-Clause
+ //
+ // Unless required by applicable law or agreed to in writing, software distributed
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ // specific language governing permissions and limitations under the License.
+ 
+ #include "yolov8.h"
+ 
+ YOLOv8::~YOLOv8()
+ {
+     det_target_size = 320;
+ }
+ 
+ int YOLOv8::load(const char* parampath, const char* modelpath, bool use_gpu)
+ {
+     yolov8.clear();
+ 
+     yolov8.opt = ncnn::Option();
+ 
+ #if NCNN_VULKAN
+     yolov8.opt.use_vulkan_compute = use_gpu;
+ #endif
+ 
+     yolov8.load_param(parampath);
+     yolov8.load_model(modelpath);
+ 
+     return 0;
+ }
+ 
+ int YOLOv8::load(AAssetManager* mgr, const char* parampath, const char* modelpath, bool use_gpu)
+ {
+     yolov8.clear();
+ 
+     yolov8.opt = ncnn::Option();
+ 
+ #if NCNN_VULKAN
+     yolov8.opt.use_vulkan_compute = use_gpu;
+ #endif
+ 
+     yolov8.load_param(mgr, parampath);
+     yolov8.load_model(mgr, modelpath);
+ 
+     return 0;
+ }
+ 
+ void YOLOv8::set_det_target_size(int target_size)
+ {
+     det_target_size = target_size;
+ }
--- a/livekit-android-track-processors/src/main/jni/yolov8.h 0 → 100644
查看文件 @b55f961
+++ b/livekit-android-track-processors/src/main/jni/yolov8.h 0 → 100644
查看文件 @b55f961
+ // Tencent is pleased to support the open source community by making ncnn available.
+ //
+ // Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
+ //
+ // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ // in compliance with the License. You may obtain a copy of the License at
+ //
+ // https://opensource.org/licenses/BSD-3-Clause
+ //
+ // Unless required by applicable law or agreed to in writing, software distributed
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ // specific language governing permissions and limitations under the License.
+ 
+ #ifndef YOLOV8_H
+ #define YOLOV8_H
+ 
+ #include <opencv2/core/core.hpp>
+ 
+ #include <net.h>
+ 
+ struct KeyPoint
+ {
+     cv::Point2f p;
+     float prob;
+ };
+ 
+ struct Object
+ {
+     cv::Rect_<float> rect;
+     cv::RotatedRect rrect;
+     int label;
+     float prob;
+     int gindex;
+     cv::Mat mask;
+     std::vector<KeyPoint> keypoints;
+ };
+ 
+ class YOLOv8
+ {
+ public:
+     virtual ~YOLOv8();
+ 
+     int load(const char* parampath, const char* modelpath, bool use_gpu = false);
+     int load(AAssetManager* mgr, const char* parampath, const char* modelpath, bool use_gpu = false);
+ 
+     void set_det_target_size(int target_size);
+ 
+     virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects) = 0;
+     virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects) = 0;
+ 
+ protected:
+     ncnn::Net yolov8;
+     int det_target_size;
+ };
+ 
+ class YOLOv8_det : public YOLOv8
+ {
+ public:
+     virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects);
+ };
+ 
+ class YOLOv8_det_coco : public YOLOv8_det
+ {
+ public:
+     virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
+ };
+ 
+ class YOLOv8_det_oiv7 : public YOLOv8_det
+ {
+ public:
+     virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
+ };
+ 
+ class YOLOv8_seg : public YOLOv8
+ {
+ public:
+     virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects);
+     virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
+ };
+ 
+ class YOLOv8_pose : public YOLOv8
+ {
+ public:
+     virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects);
+     virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
+ };
+ 
+ class YOLOv8_cls : public YOLOv8
+ {
+ public:
+     virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects);
+     virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
+ };
+ 
+ class YOLOv8_obb : public YOLOv8
+ {
+ public:
+     virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects);
+     virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
+ };
+ 
+ #endif // YOLOV8_H
--- a/livekit-android-track-processors/src/main/jni/yolov8_cls.cpp 0 → 100644
查看文件 @b55f961
+++ b/livekit-android-track-processors/src/main/jni/yolov8_cls.cpp 0 → 100644
查看文件 @b55f961
+ // Tencent is pleased to support the open source community by making ncnn available.
+ //
+ // Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
+ //
+ // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ // in compliance with the License. You may obtain a copy of the License at
+ //
+ // https://opensource.org/licenses/BSD-3-Clause
+ //
+ // Unless required by applicable law or agreed to in writing, software distributed
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ // specific language governing permissions and limitations under the License.
+ 
+ // 1. install
+ //      pip3 install -U ultralytics pnnx ncnn
+ // 2. export yolov8-cls torchscript
+ //      yolo export model=yolov8n-cls.pt format=torchscript
+ // 3. convert torchscript with static shape
+ //      pnnx yolov8n-cls.torchscript
+ // 4. now you get ncnn model files
+ //      yolov8n_cls.ncnn.param
+ //      yolov8n_cls.ncnn.bin
+ 
+ #include "yolov8.h"
+ 
+ #include <opencv2/core/core.hpp>
+ #include <opencv2/imgproc/imgproc.hpp>
+ 
+ #include <float.h>
+ #include <stdio.h>
+ #include <vector>
+ 
+ static void get_topk(const ncnn::Mat& cls_scores, int topk, std::vector<Object>& objects)
+ {
+     // partial sort topk with index
+     int size = cls_scores.w;
+     std::vector<std::pair<float, int> > vec;
+     vec.resize(size);
+     for (int i = 0; i < size; i++)
+     {
+         vec[i] = std::make_pair(cls_scores[i], i);
+     }
+ 
+     std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
+                       std::greater<std::pair<float, int> >());
+ 
+     objects.resize(topk);
+     for (int i = 0; i < topk; i++)
+     {
+         objects[i].label = vec[i].second;
+         objects[i].prob = vec[i].first;
+     }
+ }
+ 
+ int YOLOv8_cls::detect(const cv::Mat& rgb, std::vector<Object>& objects)
+ {
+     const int target_size = 224;
+     const int topk = 5;
+ 
+     int img_w = rgb.cols;
+     int img_h = rgb.rows;
+ 
+     // letterbox pad
+     int w = img_w;
+     int h = img_h;
+     float scale = 1.f;
+     if (w > h)
+     {
+         scale = (float)target_size / w;
+         w = target_size;
+         h = h * scale;
+     }
+     else
+     {
+         scale = (float)target_size / h;
+         h = target_size;
+         w = w * scale;
+     }
+ 
+     ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
+ 
+     // letterbox pad to target_size rectangle
+     int wpad = target_size - w;
+     int hpad = target_size - h;
+     ncnn::Mat in_pad;
+     ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
+ 
+     const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
+     in_pad.substract_mean_normalize(0, norm_vals);
+ 
+     ncnn::Extractor ex = yolov8.create_extractor();
+ 
+     ex.input("in0", in_pad);
+ 
+     ncnn::Mat out;
+     ex.extract("out0", out);
+ 
+     // return top-5
+     get_topk(out, topk, objects);
+ 
+     return 0;
+ }
+ 
+ int YOLOv8_cls::draw(cv::Mat& rgb, const std::vector<Object>& objects)
+ {
+     static const char* class_names[] = {
+         "tench", "goldfish", "great white shark", "tiger shark", "hammerhead", "electric ray", "stingray", "cock",
+         "hen", "ostrich", "brambling", "goldfinch", "house finch", "junco", "indigo bunting", "robin", "bulbul",
+         "jay", "magpie", "chickadee", "water ouzel", "kite", "bald eagle", "vulture", "great grey owl",
+         "European fire salamander", "common newt", "eft", "spotted salamander", "axolotl", "bullfrog", "tree frog",
+         "tailed frog", "loggerhead", "leatherback turtle", "mud turtle", "terrapin", "box turtle", "banded gecko",
+         "common iguana", "American chameleon", "whiptail", "agama", "frilled lizard", "alligator lizard",
+         "Gila monster", "green lizard", "African chameleon", "Komodo dragon", "African crocodile",
+         "American alligator", "triceratops", "thunder snake", "ringneck snake", "hognose snake", "green snake",
+         "king snake", "garter snake", "water snake", "vine snake", "night snake", "boa constrictor", "rock python",
+         "Indian cobra", "green mamba", "sea snake", "horned viper", "diamondback", "sidewinder", "trilobite",
+         "harvestman", "scorpion", "black and gold garden spider", "barn spider", "garden spider", "black widow",
+         "tarantula", "wolf spider", "tick", "centipede", "black grouse", "ptarmigan", "ruffed grouse",
+         "prairie chicken", "peacock", "quail", "partridge", "African grey", "macaw", "sulphur-crested cockatoo",
+         "lorikeet", "coucal", "bee eater", "hornbill", "hummingbird", "jacamar", "toucan", "drake",
+         "red-breasted merganser", "goose", "black swan", "tusker", "echidna", "platypus", "wallaby", "koala",
+         "wombat", "jellyfish", "sea anemone", "brain coral", "flatworm", "nematode", "conch", "snail", "slug",
+         "sea slug", "chiton", "chambered nautilus", "Dungeness crab", "rock crab", "fiddler crab", "king crab",
+         "American lobster", "spiny lobster", "crayfish", "hermit crab", "isopod", "white stork", "black stork",
+         "spoonbill", "flamingo", "little blue heron", "American egret", "bittern", "crane (bird)", "limpkin",
+         "European gallinule", "American coot", "bustard", "ruddy turnstone", "red-backed sandpiper", "redshank",
+         "dowitcher", "oystercatcher", "pelican", "king penguin", "albatross", "grey whale", "killer whale",
+         "dugong", "sea lion", "Chihuahua", "Japanese spaniel", "Maltese dog", "Pekinese", "Shih-Tzu",
+         "Blenheim spaniel", "papillon", "toy terrier", "Rhodesian ridgeback", "Afghan hound", "basset", "beagle",
+         "bloodhound", "bluetick", "black-and-tan coonhound", "Walker hound", "English foxhound", "redbone",
+         "borzoi", "Irish wolfhound", "Italian greyhound", "whippet", "Ibizan hound", "Norwegian elkhound",
+         "otterhound", "Saluki", "Scottish deerhound", "Weimaraner", "Staffordshire bullterrier",
+         "American Staffordshire terrier", "Bedlington terrier", "Border terrier", "Kerry blue terrier",
+         "Irish terrier", "Norfolk terrier", "Norwich terrier", "Yorkshire terrier", "wire-haired fox terrier",
+         "Lakeland terrier", "Sealyham terrier", "Airedale", "cairn", "Australian terrier", "Dandie Dinmont",
+         "Boston bull", "miniature schnauzer", "giant schnauzer", "standard schnauzer", "Scotch terrier",
+         "Tibetan terrier", "silky terrier", "soft-coated wheaten terrier", "West Highland white terrier",
+         "Lhasa", "flat-coated retriever", "curly-coated retriever", "golden retriever", "Labrador retriever",
+         "Chesapeake Bay retriever", "German short-haired pointer", "vizsla", "English setter", "Irish setter",
+         "Gordon setter", "Brittany spaniel", "clumber", "English springer", "Welsh springer spaniel",
+         "cocker spaniel", "Sussex spaniel", "Irish water spaniel", "kuvasz", "schipperke", "groenendael",
+         "malinois", "briard", "kelpie", "komondor", "Old English sheepdog", "Shetland sheepdog", "collie",
+         "Border collie", "Bouvier des Flandres", "Rottweiler", "German shepherd", "Doberman",
+         "miniature pinscher", "Greater Swiss Mountain dog", "Bernese mountain dog", "Appenzeller", "EntleBucher",
+         "boxer", "bull mastiff", "Tibetan mastiff", "French bulldog", "Great Dane", "Saint Bernard",
+         "Eskimo dog", "malamute", "Siberian husky", "dalmatian", "affenpinscher", "basenji", "pug", "Leonberg",
+         "Newfoundland", "Great Pyrenees", "Samoyed", "Pomeranian", "chow", "keeshond", "Brabancon griffon",
+         "Pembroke", "Cardigan", "toy poodle", "miniature poodle", "standard poodle", "Mexican hairless",
+         "timber wolf", "white wolf", "red wolf", "coyote", "dingo", "dhole", "African hunting dog", "hyena",
+         "red fox", "kit fox", "Arctic fox", "grey fox", "tabby", "tiger cat", "Persian cat", "Siamese cat",
+         "Egyptian cat", "cougar", "lynx", "leopard", "snow leopard", "jaguar", "lion", "tiger", "cheetah",
+         "brown bear", "American black bear", "ice bear", "sloth bear", "mongoose", "meerkat", "tiger beetle",
+         "ladybug", "ground beetle", "long-horned beetle", "leaf beetle", "dung beetle", "rhinoceros beetle",
+         "weevil", "fly", "bee", "ant", "grasshopper", "cricket", "walking stick", "cockroach", "mantis",
+         "cicada", "leafhopper", "lacewing", "dragonfly", "damselfly", "admiral", "ringlet", "monarch",
+         "cabbage butterfly", "sulphur butterfly", "lycaenid", "starfish", "sea urchin", "sea cucumber",
+         "wood rabbit", "hare", "Angora", "hamster", "porcupine", "fox squirrel", "marmot", "beaver",
+         "guinea pig", "sorrel", "zebra", "hog", "wild boar", "warthog", "hippopotamus", "ox", "water buffalo",
+         "bison", "ram", "bighorn", "ibex", "hartebeest", "impala", "gazelle", "Arabian camel", "llama",
+         "weasel", "mink", "polecat", "black-footed ferret", "otter", "skunk", "badger", "armadillo",
+         "three-toed sloth", "orangutan", "gorilla", "chimpanzee", "gibbon", "siamang", "guenon", "patas",
+         "baboon", "macaque", "langur", "colobus", "proboscis monkey", "marmoset", "capuchin", "howler monkey",
+         "titi", "spider monkey", "squirrel monkey", "Madagascar cat", "indri", "Indian elephant",
+         "African elephant", "lesser panda", "giant panda", "barracouta", "eel", "coho", "rock beauty",
+         "anemone fish", "sturgeon", "gar", "lionfish", "puffer", "abacus", "abaya", "academic gown",
+         "accordion", "acoustic guitar", "aircraft carrier", "airliner", "airship", "altar", "ambulance",
+         "amphibian", "analog clock", "apiary", "apron", "ashcan", "assault rifle", "backpack", "bakery",
+         "balance beam", "balloon", "ballpoint", "Band Aid", "banjo", "bannister", "barbell", "barber chair",
+         "barbershop", "barn", "barometer", "barrel", "barrow", "baseball", "basketball", "bassinet", "bassoon",
+         "bathing cap", "bath towel", "bathtub", "beach wagon", "beacon", "beaker", "bearskin", "beer bottle",
+         "beer glass", "bell cote", "bib", "bicycle-built-for-two", "bikini", "binder", "binoculars",
+         "birdhouse", "boathouse", "bobsled", "bolo tie", "bonnet", "bookcase", "bookshop", "bottlecap", "bow",
+         "bow tie", "brass", "brassiere", "breakwater", "breastplate", "broom", "bucket", "buckle",
+         "bulletproof vest", "bullet train", "butcher shop", "cab", "caldron", "candle", "cannon", "canoe",
+         "can opener", "cardigan", "car mirror", "carousel", "carpenter's kit", "carton", "car wheel",
+         "cash machine", "cassette", "cassette player", "castle", "catamaran", "CD player", "cello",
+         "cellular telephone", "chain", "chainlink fence", "chain mail", "chain saw", "chest", "chiffonier",
+         "chime", "china cabinet", "Christmas stocking", "church", "cinema", "cleaver", "cliff dwelling",
+         "cloak", "clog", "cocktail shaker", "coffee mug", "coffeepot", "coil", "combination lock",
+         "computer keyboard", "confectionery", "container ship", "convertible", "corkscrew", "cornet",
+         "cowboy boot", "cowboy hat", "cradle", "crane (machine)", "crash helmet", "crate", "crib",
+         "Crock Pot", "croquet ball", "crutch", "cuirass", "dam", "desk", "desktop computer", "dial telephone",
+         "diaper", "digital clock", "digital watch", "dining table", "dishrag", "dishwasher", "disk brake",
+         "dock", "dogsled", "dome", "doormat", "drilling platform", "drum", "drumstick", "dumbbell",
+         "Dutch oven", "electric fan", "electric guitar", "electric locomotive", "entertainment center",
+         "envelope", "espresso maker", "face powder", "feather boa", "file", "fireboat", "fire engine",
+         "fire screen", "flagpole", "flute", "folding chair", "football helmet", "forklift", "fountain",
+         "fountain pen", "four-poster", "freight car", "French horn", "frying pan", "fur coat", "garbage truck",
+         "gasmask", "gas pump", "goblet", "go-kart", "golf ball", "golfcart", "gondola", "gong", "gown",
+         "grand piano", "greenhouse", "grille", "grocery store", "guillotine", "hair slide", "hair spray",
+         "half track", "hammer", "hamper", "hand blower", "hand-held computer", "handkerchief", "hard disc",
+         "harmonica", "harp", "harvester", "hatchet", "holster", "home theater", "honeycomb", "hook",
+         "hoopskirt", "horizontal bar", "horse cart", "hourglass", "iPod", "iron", "jack-o'-lantern", "jean",
+         "jeep", "jersey", "jigsaw puzzle", "jinrikisha", "joystick", "kimono", "knee pad", "knot", "lab coat",
+         "ladle", "lampshade", "laptop", "lawn mower", "lens cap", "letter opener", "library", "lifeboat",
+         "lighter", "limousine", "liner", "lipstick", "Loafer", "lotion", "loudspeaker", "loupe", "lumbermill",
+         "magnetic compass", "mailbag", "mailbox", "maillot (tights)", "maillot (tank suit)", "manhole cover",
+         "maraca", "marimba", "mask", "matchstick", "maypole", "maze", "measuring cup", "medicine chest",
+         "megalith", "microphone", "microwave", "military uniform", "milk can", "minibus", "miniskirt",
+         "minivan", "missile", "mitten", "mixing bowl", "mobile home", "Model T", "modem", "monastery",
+         "monitor", "moped", "mortar", "mortarboard", "mosque", "mosquito net", "motor scooter", "mountain bike",
+         "mountain tent", "mouse", "mousetrap", "moving van", "muzzle", "nail", "neck brace", "necklace",
+         "nipple", "notebook", "obelisk", "oboe", "ocarina", "odometer", "oil filter", "organ", "oscilloscope",
+         "overskirt", "oxcart", "oxygen mask", "packet", "paddle", "paddlewheel", "padlock", "paintbrush",
+         "pajama", "palace", "panpipe", "paper towel", "parachute", "parallel bars", "park bench",
+         "parking meter", "passenger car", "patio", "pay-phone", "pedestal", "pencil box", "pencil sharpener",
+         "perfume", "Petri dish", "photocopier", "pick", "pickelhaube", "picket fence", "pickup", "pier",
+         "piggy bank", "pill bottle", "pillow", "ping-pong ball", "pinwheel", "pirate", "pitcher", "plane",
+         "planetarium", "plastic bag", "plate rack", "plow", "plunger", "Polaroid camera", "pole",
+         "police van", "poncho", "pool table", "pop bottle", "pot", "potter's wheel", "power drill",
+         "prayer rug", "printer", "prison", "projectile", "projector", "puck", "punching bag", "purse",
+         "quill", "quilt", "racer", "racket", "radiator", "radio", "radio telescope", "rain barrel",
+         "recreational vehicle", "reel", "reflex camera", "refrigerator", "remote control", "restaurant",
+         "revolver", "rifle", "rocking chair", "rotisserie", "rubber eraser", "rugby ball", "rule",
+         "running shoe", "safe", "safety pin", "saltshaker", "sandal", "sarong", "sax", "scabbard", "scale",
+         "school bus", "schooner", "scoreboard", "screen", "screw", "screwdriver", "seat belt", "sewing machine",
+         "shield", "shoe shop", "shoji", "shopping basket", "shopping cart", "shovel", "shower cap",
+         "shower curtain", "ski", "ski mask", "sleeping bag", "slide rule", "sliding door", "slot", "snorkel",
+         "snowmobile", "snowplow", "soap dispenser", "soccer ball", "sock", "solar dish", "sombrero",
+         "soup bowl", "space bar", "space heater", "space shuttle", "spatula", "speedboat", "spider web",
+         "spindle", "sports car", "spotlight", "stage", "steam locomotive", "steel arch bridge", "steel drum",
+         "stethoscope", "stole", "stone wall", "stopwatch", "stove", "strainer", "streetcar", "stretcher",
+         "studio couch", "stupa", "submarine", "suit", "sundial", "sunglass", "sunglasses", "sunscreen",
+         "suspension bridge", "swab", "sweatshirt", "swimming trunks", "swing", "switch", "syringe",
+         "table lamp", "tank", "tape player", "teapot", "teddy", "television", "tennis ball", "thatch",
+         "theater curtain", "thimble", "thresher", "throne", "tile roof", "toaster", "tobacco shop",
+         "toilet seat", "torch", "totem pole", "tow truck", "toyshop", "tractor", "trailer truck", "tray",
+         "trench coat", "tricycle", "trimaran", "tripod", "triumphal arch", "trolleybus", "trombone", "tub",
+         "turnstile", "typewriter keyboard", "umbrella", "unicycle", "upright", "vacuum", "vase", "vault",
+         "velvet", "vending machine", "vestment", "viaduct", "violin", "volleyball", "waffle iron", "wall clock",
+         "wallet", "wardrobe", "warplane", "washbasin", "washer", "water bottle", "water jug", "water tower",
+         "whiskey jug", "whistle", "wig", "window screen", "window shade", "Windsor tie", "wine bottle", "wing",
+         "wok", "wooden spoon", "wool", "worm fence", "wreck", "yawl", "yurt", "web site", "comic book",
+         "crossword puzzle", "street sign", "traffic light", "book jacket", "menu", "plate", "guacamole",
+         "consomme", "hot pot", "trifle", "ice cream", "ice lolly", "French loaf", "bagel", "pretzel",
+         "cheeseburger", "hotdog", "mashed potato", "head cabbage", "broccoli", "cauliflower", "zucchini",
+         "spaghetti squash", "acorn squash", "butternut squash", "cucumber", "artichoke", "bell pepper",
+         "cardoon", "mushroom", "Granny Smith", "strawberry", "orange", "lemon", "fig", "pineapple", "banana",
+         "jackfruit", "custard apple", "pomegranate", "hay", "carbonara", "chocolate sauce", "dough",
+         "meat loaf", "pizza", "potpie", "burrito", "red wine", "espresso", "cup", "eggnog", "alp", "bubble",
+         "cliff", "coral reef", "geyser", "lakeside", "promontory", "sandbar", "seashore", "valley", "volcano",
+         "ballplayer", "groom", "scuba diver", "rapeseed", "daisy", "yellow lady's slipper", "corn", "acorn",
+         "hip", "buckeye", "coral fungus", "agaric", "gyromitra", "stinkhorn", "earthstar", "hen-of-the-woods",
+         "bolete", "ear", "toilet tissue"
+     };
+ 
+     int y_offset = 0;
+     for (size_t i = 0; i < objects.size(); i++)
+     {
+         const Object& obj = objects[i];
+ 
+         // fprintf(stderr, "%d = %.5f\n", obj.label, obj.prob);
+ 
+         char text[256];
+         sprintf(text, "%4.1f%% %s", obj.prob * 100, class_names[obj.label]);
+ 
+         int baseLine = 0;
+         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+ 
+         int x = 0;
+         int y = y_offset;
+ 
+         cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+                       cv::Scalar(255, 255, 255), -1);
+ 
+         cv::putText(rgb, text, cv::Point(x, y + label_size.height),
+                     cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
+ 
+         y_offset += label_size.height;
+     }
+ 
+     return 0;
+ }
--- a/livekit-android-track-processors/src/main/jni/yolov8_det.cpp 0 → 100644
查看文件 @b55f961
+++ b/livekit-android-track-processors/src/main/jni/yolov8_det.cpp 0 → 100644
查看文件 @b55f961
+ // Tencent is pleased to support the open source community by making ncnn available.
+ //
+ // Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
+ //
+ // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ // in compliance with the License. You may obtain a copy of the License at
+ //
+ // https://opensource.org/licenses/BSD-3-Clause
+ //
+ // Unless required by applicable law or agreed to in writing, software distributed
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ // specific language governing permissions and limitations under the License.
+ 
+ // 1. install
+ //      pip3 install -U ultralytics pnnx ncnn
+ // 2. export yolov8 torchscript
+ //      yolo export model=yolov8n.pt format=torchscript
+ // 3. convert torchscript with static shape
+ //      pnnx yolov8n.torchscript
+ // 4. modify yolov8n_pnnx.py for dynamic shape inference
+ //      A. modify reshape to support dynamic image sizes
+ //      B. permute tensor before concat and adjust concat axis
+ //      C. drop post-process part
+ //      before:
+ //          v_165 = v_142.view(1, 144, 6400)
+ //          v_166 = v_153.view(1, 144, 1600)
+ //          v_167 = v_164.view(1, 144, 400)
+ //          v_168 = torch.cat((v_165, v_166, v_167), dim=2)
+ //          ...
+ //      after:
+ //          v_165 = v_142.view(1, 144, -1).transpose(1, 2)
+ //          v_166 = v_153.view(1, 144, -1).transpose(1, 2)
+ //          v_167 = v_164.view(1, 144, -1).transpose(1, 2)
+ //          v_168 = torch.cat((v_165, v_166, v_167), dim=1)
+ //          return v_168
+ // 5. re-export yolov8 torchscript
+ //      python3 -c 'import yolov8n_pnnx; yolov8n_pnnx.export_torchscript()'
+ // 6. convert new torchscript with dynamic shape
+ //      pnnx yolov8n_pnnx.py.pt inputshape=[1,3,640,640] inputshape2=[1,3,320,320]
+ // 7. now you get ncnn model files
+ //      mv yolov8n_pnnx.py.ncnn.param yolov8n.ncnn.param
+ //      mv yolov8n_pnnx.py.ncnn.bin yolov8n.ncnn.bin
+ 
+ // the out blob would be a 2-dim tensor with w=144 h=8400
+ //
+ //        | bbox-reg 16 x 4       | per-class scores(80) |
+ //        +-----+-----+-----+-----+----------------------+
+ //        | dx0 | dy0 | dx1 | dy1 |0.1 0.0 0.0 0.5 ......|
+ //   all /|     |     |     |     |           .          |
+ //  boxes |  .. |  .. |  .. |  .. |0.0 0.9 0.0 0.0 ......|
+ //  (8400)|     |     |     |     |           .          |
+ //       \|     |     |     |     |           .          |
+ //        +-----+-----+-----+-----+----------------------+
+ //
+ 
+ #include "yolov8.h"
+ 
+ #include <opencv2/core/core.hpp>
+ #include <opencv2/imgproc/imgproc.hpp>
+ 
+ static inline float intersection_area(const Object& a, const Object& b)
+ {
+     cv::Rect_<float> inter = a.rect & b.rect;
+     return inter.area();
+ }
+ 
+ static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
+ {
+     int i = left;
+     int j = right;
+     float p = objects[(left + right) / 2].prob;
+ 
+     while (i <= j)
+     {
+         while (objects[i].prob > p)
+             i++;
+ 
+         while (objects[j].prob < p)
+             j--;
+ 
+         if (i <= j)
+         {
+             // swap
+             std::swap(objects[i], objects[j]);
+ 
+             i++;
+             j--;
+         }
+     }
+ 
+     // #pragma omp parallel sections
+     {
+         // #pragma omp section
+         {
+             if (left < j) qsort_descent_inplace(objects, left, j);
+         }
+         // #pragma omp section
+         {
+             if (i < right) qsort_descent_inplace(objects, i, right);
+         }
+     }
+ }
+ 
+ static void qsort_descent_inplace(std::vector<Object>& objects)
+ {
+     if (objects.empty())
+         return;
+ 
+     qsort_descent_inplace(objects, 0, objects.size() - 1);
+ }
+ 
+ static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
+ {
+     picked.clear();
+ 
+     const int n = objects.size();
+ 
+     std::vector<float> areas(n);
+     for (int i = 0; i < n; i++)
+     {
+         areas[i] = objects[i].rect.area();
+     }
+ 
+     for (int i = 0; i < n; i++)
+     {
+         const Object& a = objects[i];
+ 
+         int keep = 1;
+         for (int j = 0; j < (int)picked.size(); j++)
+         {
+             const Object& b = objects[picked[j]];
+ 
+             if (!agnostic && a.label != b.label)
+                 continue;
+ 
+             // intersection over union
+             float inter_area = intersection_area(a, b);
+             float union_area = areas[i] + areas[picked[j]] - inter_area;
+             // float IoU = inter_area / union_area
+             if (inter_area / union_area > nms_threshold)
+                 keep = 0;
+         }
+ 
+         if (keep)
+             picked.push_back(i);
+     }
+ }
+ 
+ static inline float sigmoid(float x)
+ {
+     return 1.0f / (1.0f + expf(-x));
+ }
+ 
+ static void generate_proposals(const ncnn::Mat& pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
+ {
+     const int w = in_pad.w;
+     const int h = in_pad.h;
+ 
+     const int num_grid_x = w / stride;
+     const int num_grid_y = h / stride;
+ 
+     const int reg_max_1 = 16;
+     const int num_class = pred.w - reg_max_1 * 4; // number of classes. 80 for COCO
+ 
+     for (int y = 0; y < num_grid_y; y++)
+     {
+         for (int x = 0; x < num_grid_x; x++)
+         {
+             const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1);
+ 
+             // find label with max score
+             int label = -1;
+             float score = -FLT_MAX;
+             {
+                 const ncnn::Mat pred_score = pred_grid.range(reg_max_1 * 4, num_class);
+ 
+                 for (int k = 0; k < num_class; k++)
+                 {
+                     float s = pred_score[k];
+                     if (s > score)
+                     {
+                         label = k;
+                         score = s;
+                     }
+                 }
+ 
+                 score = sigmoid(score);
+             }
+ 
+             if (score >= prob_threshold)
+             {
+                 ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4);
+ 
+                 {
+                     ncnn::Layer* softmax = ncnn::create_layer("Softmax");
+ 
+                     ncnn::ParamDict pd;
+                     pd.set(0, 1); // axis
+                     pd.set(1, 1);
+                     softmax->load_param(pd);
+ 
+                     ncnn::Option opt;
+                     opt.num_threads = 1;
+                     opt.use_packing_layout = false;
+ 
+                     softmax->create_pipeline(opt);
+ 
+                     softmax->forward_inplace(pred_bbox, opt);
+ 
+                     softmax->destroy_pipeline(opt);
+ 
+                     delete softmax;
+                 }
+ 
+                 float pred_ltrb[4];
+                 for (int k = 0; k < 4; k++)
+                 {
+                     float dis = 0.f;
+                     const float* dis_after_sm = pred_bbox.row(k);
+                     for (int l = 0; l < reg_max_1; l++)
+                     {
+                         dis += l * dis_after_sm[l];
+                     }
+ 
+                     pred_ltrb[k] = dis * stride;
+                 }
+ 
+                 float pb_cx = (x + 0.5f) * stride;
+                 float pb_cy = (y + 0.5f) * stride;
+ 
+                 float x0 = pb_cx - pred_ltrb[0];
+                 float y0 = pb_cy - pred_ltrb[1];
+                 float x1 = pb_cx + pred_ltrb[2];
+                 float y1 = pb_cy + pred_ltrb[3];
+ 
+                 Object obj;
+                 obj.rect.x = x0;
+                 obj.rect.y = y0;
+                 obj.rect.width = x1 - x0;
+                 obj.rect.height = y1 - y0;
+                 obj.label = label;
+                 obj.prob = score;
+ 
+                 objects.push_back(obj);
+             }
+         }
+     }
+ }
+ 
+ static void generate_proposals(const ncnn::Mat& pred, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
+ {
+     const int w = in_pad.w;
+     const int h = in_pad.h;
+ 
+     int pred_row_offset = 0;
+     for (size_t i = 0; i < strides.size(); i++)
+     {
+         const int stride = strides[i];
+ 
+         const int num_grid_x = w / stride;
+         const int num_grid_y = h / stride;
+         const int num_grid = num_grid_x * num_grid_y;
+ 
+         generate_proposals(pred.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects);
+         pred_row_offset += num_grid;
+     }
+ }
+ 
+ int YOLOv8_det::detect(const cv::Mat& rgb, std::vector<Object>& objects)
+ {
+     const int target_size = det_target_size;//640;
+     const float prob_threshold = 0.25f;
+     const float nms_threshold = 0.45f;
+ 
+     int img_w = rgb.cols;
+     int img_h = rgb.rows;
+ 
+     // ultralytics/cfg/models/v8/yolov8.yaml
+     std::vector<int> strides(3);
+     strides[0] = 8;
+     strides[1] = 16;
+     strides[2] = 32;
+     const int max_stride = 32;
+ 
+     // letterbox pad to multiple of max_stride
+     int w = img_w;
+     int h = img_h;
+     float scale = 1.f;
+     if (w > h)
+     {
+         scale = (float)target_size / w;
+         w = target_size;
+         h = h * scale;
+     }
+     else
+     {
+         scale = (float)target_size / h;
+         h = target_size;
+         w = w * scale;
+     }
+ 
+     ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
+ 
+     // letterbox pad to target_size rectangle
+     int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
+     int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
+     ncnn::Mat in_pad;
+     ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
+ 
+     const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
+     in_pad.substract_mean_normalize(0, norm_vals);
+ 
+     ncnn::Extractor ex = yolov8.create_extractor();
+ 
+     ex.input("in0", in_pad);
+ 
+     ncnn::Mat out;
+     ex.extract("out0", out);
+ 
+     std::vector<Object> proposals;
+     generate_proposals(out, strides, in_pad, prob_threshold, proposals);
+ 
+     // sort all proposals by score from highest to lowest
+     qsort_descent_inplace(proposals);
+ 
+     // apply nms with nms_threshold
+     std::vector<int> picked;
+     nms_sorted_bboxes(proposals, picked, nms_threshold);
+ 
+     int count = picked.size();
+ 
+     objects.resize(count);
+     for (int i = 0; i < count; i++)
+     {
+         objects[i] = proposals[picked[i]];
+ 
+         // adjust offset to original unpadded
+         float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
+         float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
+         float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
+         float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
+ 
+         // clip
+         x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
+         y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
+         x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
+         y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
+ 
+         objects[i].rect.x = x0;
+         objects[i].rect.y = y0;
+         objects[i].rect.width = x1 - x0;
+         objects[i].rect.height = y1 - y0;
+     }
+ 
+     // sort objects by area
+     struct
+     {
+         bool operator()(const Object& a, const Object& b) const
+         {
+             return a.rect.area() > b.rect.area();
+         }
+     } objects_area_greater;
+     std::sort(objects.begin(), objects.end(), objects_area_greater);
+ 
+     return 0;
+ }
+ 
+ int YOLOv8_det_coco::draw(cv::Mat& rgb, const std::vector<Object>& objects)
+ {
+     static const char* class_names[] = {
+         "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+         "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+         "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+         "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+         "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+         "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+         "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+         "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+         "hair drier", "toothbrush"
+     };
+ 
+     static cv::Scalar colors[] = {
+         cv::Scalar( 67,  54, 244),
+         cv::Scalar( 30,  99, 233),
+         cv::Scalar( 39, 176, 156),
+         cv::Scalar( 58, 183, 103),
+         cv::Scalar( 81, 181,  63),
+         cv::Scalar(150, 243,  33),
+         cv::Scalar(169, 244,   3),
+         cv::Scalar(188, 212,   0),
+         cv::Scalar(150, 136,   0),
+         cv::Scalar(175,  80,  76),
+         cv::Scalar(195,  74, 139),
+         cv::Scalar(220,  57, 205),
+         cv::Scalar(235,  59, 255),
+         cv::Scalar(193,   7, 255),
+         cv::Scalar(152,   0, 255),
+         cv::Scalar( 87,  34, 255),
+         cv::Scalar( 85,  72, 121),
+         cv::Scalar(158, 158, 158),
+         cv::Scalar(125, 139,  96)
+     };
+ 
+     for (size_t i = 0; i < objects.size(); i++)
+     {
+         const Object& obj = objects[i];
+ 
+         const cv::Scalar& color = colors[i % 19];
+ 
+         // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
+                 // obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
+ 
+         cv::rectangle(rgb, obj.rect, color);
+ 
+         char text[256];
+         sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
+ 
+         int baseLine = 0;
+         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+ 
+         int x = obj.rect.x;
+         int y = obj.rect.y - label_size.height - baseLine;
+         if (y < 0)
+             y = 0;
+         if (x + label_size.width > rgb.cols)
+             x = rgb.cols - label_size.width;
+ 
+         cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+                       cv::Scalar(255, 255, 255), -1);
+ 
+         cv::putText(rgb, text, cv::Point(x, y + label_size.height),
+                     cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
+     }
+ 
+     return 0;
+ }
+ 
+ int YOLOv8_det_oiv7::draw(cv::Mat& rgb, const std::vector<Object>& objects)
+ {
+     static const char* class_names[] = {
+         "Accordion", "Adhesive tape", "Aircraft", "Airplane", "Alarm clock", "Alpaca", "Ambulance", "Animal",
+         "Ant", "Antelope", "Apple", "Armadillo", "Artichoke", "Auto part", "Axe", "Backpack", "Bagel",
+         "Baked goods", "Balance beam", "Ball", "Balloon", "Banana", "Band-aid", "Banjo", "Barge", "Barrel",
+         "Baseball bat", "Baseball glove", "Bat (Animal)", "Bathroom accessory", "Bathroom cabinet", "Bathtub",
+         "Beaker", "Bear", "Bed", "Bee", "Beehive", "Beer", "Beetle", "Bell pepper", "Belt", "Bench", "Bicycle",
+         "Bicycle helmet", "Bicycle wheel", "Bidet", "Billboard", "Billiard table", "Binoculars", "Bird",
+         "Blender", "Blue jay", "Boat", "Bomb", "Book", "Bookcase", "Boot", "Bottle", "Bottle opener",
+         "Bow and arrow", "Bowl", "Bowling equipment", "Box", "Boy", "Brassiere", "Bread", "Briefcase",
+         "Broccoli", "Bronze sculpture", "Brown bear", "Building", "Bull", "Burrito", "Bus", "Bust", "Butterfly",
+         "Cabbage", "Cabinetry", "Cake", "Cake stand", "Calculator", "Camel", "Camera", "Can opener", "Canary",
+         "Candle", "Candy", "Cannon", "Canoe", "Cantaloupe", "Car", "Carnivore", "Carrot", "Cart", "Cassette deck",
+         "Castle", "Cat", "Cat furniture", "Caterpillar", "Cattle", "Ceiling fan", "Cello", "Centipede",
+         "Chainsaw", "Chair", "Cheese", "Cheetah", "Chest of drawers", "Chicken", "Chime", "Chisel", "Chopsticks",
+         "Christmas tree", "Clock", "Closet", "Clothing", "Coat", "Cocktail", "Cocktail shaker", "Coconut",
+         "Coffee", "Coffee cup", "Coffee table", "Coffeemaker", "Coin", "Common fig", "Common sunflower",
+         "Computer keyboard", "Computer monitor", "Computer mouse", "Container", "Convenience store", "Cookie",
+         "Cooking spray", "Corded phone", "Cosmetics", "Couch", "Countertop", "Cowboy hat", "Crab", "Cream",
+         "Cricket ball", "Crocodile", "Croissant", "Crown", "Crutch", "Cucumber", "Cupboard", "Curtain",
+         "Cutting board", "Dagger", "Dairy Product", "Deer", "Desk", "Dessert", "Diaper", "Dice", "Digital clock",
+         "Dinosaur", "Dishwasher", "Dog", "Dog bed", "Doll", "Dolphin", "Door", "Door handle", "Doughnut",
+         "Dragonfly", "Drawer", "Dress", "Drill (Tool)", "Drink", "Drinking straw", "Drum", "Duck", "Dumbbell",
+         "Eagle", "Earrings", "Egg (Food)", "Elephant", "Envelope", "Eraser", "Face powder", "Facial tissue holder",
+         "Falcon", "Fashion accessory", "Fast food", "Fax", "Fedora", "Filing cabinet", "Fire hydrant",
+         "Fireplace", "Fish", "Flag", "Flashlight", "Flower", "Flowerpot", "Flute", "Flying disc", "Food",
+         "Food processor", "Football", "Football helmet", "Footwear", "Fork", "Fountain", "Fox", "French fries",
+         "French horn", "Frog", "Fruit", "Frying pan", "Furniture", "Garden Asparagus", "Gas stove", "Giraffe",
+         "Girl", "Glasses", "Glove", "Goat", "Goggles", "Goldfish", "Golf ball", "Golf cart", "Gondola",
+         "Goose", "Grape", "Grapefruit", "Grinder", "Guacamole", "Guitar", "Hair dryer", "Hair spray", "Hamburger",
+         "Hammer", "Hamster", "Hand dryer", "Handbag", "Handgun", "Harbor seal", "Harmonica", "Harp",
+         "Harpsichord", "Hat", "Headphones", "Heater", "Hedgehog", "Helicopter", "Helmet", "High heels",
+         "Hiking equipment", "Hippopotamus", "Home appliance", "Honeycomb", "Horizontal bar", "Horse", "Hot dog",
+         "House", "Houseplant", "Human arm", "Human beard", "Human body", "Human ear", "Human eye", "Human face",
+         "Human foot", "Human hair", "Human hand", "Human head", "Human leg", "Human mouth", "Human nose",
+         "Humidifier", "Ice cream", "Indoor rower", "Infant bed", "Insect", "Invertebrate", "Ipod", "Isopod",
+         "Jacket", "Jacuzzi", "Jaguar (Animal)", "Jeans", "Jellyfish", "Jet ski", "Jug", "Juice", "Kangaroo",
+         "Kettle", "Kitchen & dining room table", "Kitchen appliance", "Kitchen knife", "Kitchen utensil",
+         "Kitchenware", "Kite", "Knife", "Koala", "Ladder", "Ladle", "Ladybug", "Lamp", "Land vehicle",
+         "Lantern", "Laptop", "Lavender (Plant)", "Lemon", "Leopard", "Light bulb", "Light switch", "Lighthouse",
+         "Lily", "Limousine", "Lion", "Lipstick", "Lizard", "Lobster", "Loveseat", "Luggage and bags", "Lynx",
+         "Magpie", "Mammal", "Man", "Mango", "Maple", "Maracas", "Marine invertebrates", "Marine mammal",
+         "Measuring cup", "Mechanical fan", "Medical equipment", "Microphone", "Microwave oven", "Milk",
+         "Miniskirt", "Mirror", "Missile", "Mixer", "Mixing bowl", "Mobile phone", "Monkey", "Moths and butterflies",
+         "Motorcycle", "Mouse", "Muffin", "Mug", "Mule", "Mushroom", "Musical instrument", "Musical keyboard",
+         "Nail (Construction)", "Necklace", "Nightstand", "Oboe", "Office building", "Office supplies", "Orange",
+         "Organ (Musical Instrument)", "Ostrich", "Otter", "Oven", "Owl", "Oyster", "Paddle", "Palm tree",
+         "Pancake", "Panda", "Paper cutter", "Paper towel", "Parachute", "Parking meter", "Parrot", "Pasta",
+         "Pastry", "Peach", "Pear", "Pen", "Pencil case", "Pencil sharpener", "Penguin", "Perfume", "Person",
+         "Personal care", "Personal flotation device", "Piano", "Picnic basket", "Picture frame", "Pig",
+         "Pillow", "Pineapple", "Pitcher (Container)", "Pizza", "Pizza cutter", "Plant", "Plastic bag", "Plate",
+         "Platter", "Plumbing fixture", "Polar bear", "Pomegranate", "Popcorn", "Porch", "Porcupine", "Poster",
+         "Potato", "Power plugs and sockets", "Pressure cooker", "Pretzel", "Printer", "Pumpkin", "Punching bag",
+         "Rabbit", "Raccoon", "Racket", "Radish", "Ratchet (Device)", "Raven", "Rays and skates", "Red panda",
+         "Refrigerator", "Remote control", "Reptile", "Rhinoceros", "Rifle", "Ring binder", "Rocket",
+         "Roller skates", "Rose", "Rugby ball", "Ruler", "Salad", "Salt and pepper shakers", "Sandal",
+         "Sandwich", "Saucer", "Saxophone", "Scale", "Scarf", "Scissors", "Scoreboard", "Scorpion",
+         "Screwdriver", "Sculpture", "Sea lion", "Sea turtle", "Seafood", "Seahorse", "Seat belt", "Segway",
+         "Serving tray", "Sewing machine", "Shark", "Sheep", "Shelf", "Shellfish", "Shirt", "Shorts",
+         "Shotgun", "Shower", "Shrimp", "Sink", "Skateboard", "Ski", "Skirt", "Skull", "Skunk", "Skyscraper",
+         "Slow cooker", "Snack", "Snail", "Snake", "Snowboard", "Snowman", "Snowmobile", "Snowplow",
+         "Soap dispenser", "Sock", "Sofa bed", "Sombrero", "Sparrow", "Spatula", "Spice rack", "Spider",
+         "Spoon", "Sports equipment", "Sports uniform", "Squash (Plant)", "Squid", "Squirrel", "Stairs",
+         "Stapler", "Starfish", "Stationary bicycle", "Stethoscope", "Stool", "Stop sign", "Strawberry",
+         "Street light", "Stretcher", "Studio couch", "Submarine", "Submarine sandwich", "Suit", "Suitcase",
+         "Sun hat", "Sunglasses", "Surfboard", "Sushi", "Swan", "Swim cap", "Swimming pool", "Swimwear",
+         "Sword", "Syringe", "Table", "Table tennis racket", "Tablet computer", "Tableware", "Taco", "Tank",
+         "Tap", "Tart", "Taxi", "Tea", "Teapot", "Teddy bear", "Telephone", "Television", "Tennis ball",
+         "Tennis racket", "Tent", "Tiara", "Tick", "Tie", "Tiger", "Tin can", "Tire", "Toaster", "Toilet",
+         "Toilet paper", "Tomato", "Tool", "Toothbrush", "Torch", "Tortoise", "Towel", "Tower", "Toy",
+         "Traffic light", "Traffic sign", "Train", "Training bench", "Treadmill", "Tree", "Tree house",
+         "Tripod", "Trombone", "Trousers", "Truck", "Trumpet", "Turkey", "Turtle", "Umbrella", "Unicycle",
+         "Van", "Vase", "Vegetable", "Vehicle", "Vehicle registration plate", "Violin", "Volleyball (Ball)",
+         "Waffle", "Waffle iron", "Wall clock", "Wardrobe", "Washing machine", "Waste container", "Watch",
+         "Watercraft", "Watermelon", "Weapon", "Whale", "Wheel", "Wheelchair", "Whisk", "Whiteboard", "Willow",
+         "Window", "Window blind", "Wine", "Wine glass", "Wine rack", "Winter melon", "Wok", "Woman",
+         "Wood-burning stove", "Woodpecker", "Worm", "Wrench", "Zebra", "Zucchini"
+     };
+ 
+     static cv::Scalar colors[] = {
+         cv::Scalar( 67,  54, 244),
+         cv::Scalar( 30,  99, 233),
+         cv::Scalar( 39, 176, 156),
+         cv::Scalar( 58, 183, 103),
+         cv::Scalar( 81, 181,  63),
+         cv::Scalar(150, 243,  33),
+         cv::Scalar(169, 244,   3),
+         cv::Scalar(188, 212,   0),
+         cv::Scalar(150, 136,   0),
+         cv::Scalar(175,  80,  76),
+         cv::Scalar(195,  74, 139),
+         cv::Scalar(220,  57, 205),
+         cv::Scalar(235,  59, 255),
+         cv::Scalar(193,   7, 255),
+         cv::Scalar(152,   0, 255),
+         cv::Scalar( 87,  34, 255),
+         cv::Scalar( 85,  72, 121),
+         cv::Scalar(158, 158, 158),
+         cv::Scalar(125, 139,  96)
+     };
+ 
+     for (size_t i = 0; i < objects.size(); i++)
+     {
+         const Object& obj = objects[i];
+ 
+         const cv::Scalar& color = colors[i % 19];
+ 
+         // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
+                 // obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
+ 
+         cv::rectangle(rgb, obj.rect, color);
+ 
+         char text[256];
+         sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
+ 
+         int baseLine = 0;
+         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+ 
+         int x = obj.rect.x;
+         int y = obj.rect.y - label_size.height - baseLine;
+         if (y < 0)
+             y = 0;
+         if (x + label_size.width > rgb.cols)
+             x = rgb.cols - label_size.width;
+ 
+         cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+                       cv::Scalar(255, 255, 255), -1);
+ 
+         cv::putText(rgb, text, cv::Point(x, y + label_size.height),
+                     cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
+     }
+ 
+     return 0;
+ }
--- a/livekit-android-track-processors/src/main/jni/yolov8_obb.cpp 0 → 100644
查看文件 @b55f961
+++ b/livekit-android-track-processors/src/main/jni/yolov8_obb.cpp 0 → 100644
查看文件 @b55f961
+ // Tencent is pleased to support the open source community by making ncnn available.
+ //
+ // Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
+ //
+ // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ // in compliance with the License. You may obtain a copy of the License at
+ //
+ // https://opensource.org/licenses/BSD-3-Clause
+ //
+ // Unless required by applicable law or agreed to in writing, software distributed
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ // specific language governing permissions and limitations under the License.
+ 
+ // 1. install
+ //      pip3 install -U ultralytics pnnx ncnn
+ // 2. export yolov8-obb torchscript
+ //      yolo export model=yolov8n-obb.pt format=torchscript
+ // 3. convert torchscript with static shape
+ //      pnnx yolov8n-obb.torchscript
+ // 4. modify yolov8n_obb_pnnx.py for dynamic shape inference
+ //      A. modify reshape to support dynamic image sizes
+ //      B. permute tensor before concat and adjust concat axis
+ //      C. drop post-process part
+ //      before:
+ //          v_137 = v_136.view(1, 1, 16384)
+ //          v_143 = v_142.view(1, 1, 4096)
+ //          v_149 = v_148.view(1, 1, 1024)
+ //          v_150 = torch.cat((v_137, v_143, v_149), dim=2)
+ //          ...
+ //          v_186 = v_163.view(1, 79, 16384)
+ //          v_187 = v_174.view(1, 79, 4096)
+ //          v_188 = v_185.view(1, 79, 1024)
+ //          v_189 = torch.cat((v_186, v_187, v_188), dim=2)
+ //          ...
+ //      after:
+ //          v_137 = v_136.view(1, 1, -1).transpose(1, 2)
+ //          v_143 = v_142.view(1, 1, -1).transpose(1, 2)
+ //          v_149 = v_148.view(1, 1, -1).transpose(1, 2)
+ //          v_150 = torch.cat((v_137, v_143, v_149), dim=1)
+ //          ...
+ //          v_186 = v_163.view(1, 79, -1).transpose(1, 2)
+ //          v_187 = v_174.view(1, 79, -1).transpose(1, 2)
+ //          v_188 = v_185.view(1, 79, -1).transpose(1, 2)
+ //          v_189 = torch.cat((v_186, v_187, v_188), dim=1)
+ //          return v_189, v_150
+ // 5. re-export yolov8-obb torchscript
+ //      python3 -c 'import yolov8n_obb_pnnx; yolov8n_obb_pnnx.export_torchscript()'
+ // 6. convert new torchscript with dynamic shape
+ //      pnnx yolov8n_obb_pnnx.py.pt inputshape=[1,3,1024,1024] inputshape2=[1,3,512,512]
+ // 7. now you get ncnn model files
+ //      mv yolov8n_obb_pnnx.py.ncnn.param yolov8n_obb.ncnn.param
+ //      mv yolov8n_obb_pnnx.py.ncnn.bin yolov8n_obb.ncnn.bin
+ 
+ // the out blob would be a 2-dim tensor with w=79 h=21504
+ //
+ //        | bbox-reg 16 x 4       |score(15)|
+ //        +-----+-----+-----+-----+---------+
+ //        | dx0 | dy0 | dx1 | dy1 | 0.1 ... |
+ //   all /|     |     |     |     |     ... |
+ //  boxes |  .. |  .. |  .. |  .. | 0.0 ... |
+ // (21504)|     |     |     |     |  .  ... |
+ //       \|     |     |     |     |  .  ... |
+ //        +-----+-----+-----+-----+---------+
+ //
+ 
+ // the out blob would be a 2-dim tensor with w=1 h=21504
+ //
+ //        | degree(1)|
+ //        +----------+
+ //        |    0.1   |
+ //   all /|          |
+ //  boxes |    0.0   |
+ // (21504)|     .    |
+ //       \|     .    |
+ //        +----------+
+ //
+ 
+ #include "yolov8.h"
+ 
+ #include "layer.h"
+ 
+ #include <opencv2/core/core.hpp>
+ #include <opencv2/imgproc/imgproc.hpp>
+ 
+ #include <float.h>
+ #include <stdio.h>
+ #include <vector>
+ 
+ static inline float intersection_area(const Object& a, const Object& b)
+ {
+     std::vector<cv::Point2f> intersection;
+     cv::rotatedRectangleIntersection(a.rrect, b.rrect, intersection);
+     if (intersection.empty())
+         return 0.f;
+ 
+     return cv::contourArea(intersection);
+ }
+ 
+ static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
+ {
+     int i = left;
+     int j = right;
+     float p = objects[(left + right) / 2].prob;
+ 
+     while (i <= j)
+     {
+         while (objects[i].prob > p)
+             i++;
+ 
+         while (objects[j].prob < p)
+             j--;
+ 
+         if (i <= j)
+         {
+             // swap
+             std::swap(objects[i], objects[j]);
+ 
+             i++;
+             j--;
+         }
+     }
+ 
+     // #pragma omp parallel sections
+     {
+         // #pragma omp section
+         {
+             if (left < j) qsort_descent_inplace(objects, left, j);
+         }
+         // #pragma omp section
+         {
+             if (i < right) qsort_descent_inplace(objects, i, right);
+         }
+     }
+ }
+ 
+ static void qsort_descent_inplace(std::vector<Object>& objects)
+ {
+     if (objects.empty())
+         return;
+ 
+     qsort_descent_inplace(objects, 0, objects.size() - 1);
+ }
+ 
+ static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
+ {
+     picked.clear();
+ 
+     const int n = objects.size();
+ 
+     std::vector<float> areas(n);
+     for (int i = 0; i < n; i++)
+     {
+         areas[i] = objects[i].rrect.size.area();
+     }
+ 
+     for (int i = 0; i < n; i++)
+     {
+         const Object& a = objects[i];
+ 
+         int keep = 1;
+         for (int j = 0; j < (int)picked.size(); j++)
+         {
+             const Object& b = objects[picked[j]];
+ 
+             if (!agnostic && a.label != b.label)
+                 continue;
+ 
+             // intersection over union
+             float inter_area = intersection_area(a, b);
+             float union_area = areas[i] + areas[picked[j]] - inter_area;
+             // float IoU = inter_area / union_area;
+             if (inter_area / union_area > nms_threshold)
+                 keep = 0;
+         }
+ 
+         if (keep)
+             picked.push_back(i);
+     }
+ }
+ 
+ static inline float sigmoid(float x)
+ {
+     return 1.0f / (1.0f + expf(-x));
+ }
+ 
+ static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_angle, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
+ {
+     const int w = in_pad.w;
+     const int h = in_pad.h;
+ 
+     const int num_grid_x = w / stride;
+     const int num_grid_y = h / stride;
+ 
+     const int reg_max_1 = 16;
+     const int num_class = pred.w - reg_max_1 * 4; // number of classes. 15 for DOTAv1
+ 
+     for (int y = 0; y < num_grid_y; y++)
+     {
+         for (int x = 0; x < num_grid_x; x++)
+         {
+             const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1);
+ 
+             // find label with max score
+             int label = -1;
+             float score = -FLT_MAX;
+             {
+                 const ncnn::Mat pred_score = pred_grid.range(reg_max_1 * 4, num_class);
+ 
+                 for (int k = 0; k < num_class; k++)
+                 {
+                     float s = pred_score[k];
+                     if (s > score)
+                     {
+                         label = k;
+                         score = s;
+                     }
+                 }
+ 
+                 score = sigmoid(score);
+             }
+ 
+             if (score >= prob_threshold)
+             {
+                 ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4).clone();
+ 
+                 {
+                     ncnn::Layer* softmax = ncnn::create_layer("Softmax");
+ 
+                     ncnn::ParamDict pd;
+                     pd.set(0, 1); // axis
+                     pd.set(1, 1);
+                     softmax->load_param(pd);
+ 
+                     ncnn::Option opt;
+                     opt.num_threads = 1;
+                     opt.use_packing_layout = false;
+ 
+                     softmax->create_pipeline(opt);
+ 
+                     softmax->forward_inplace(pred_bbox, opt);
+ 
+                     softmax->destroy_pipeline(opt);
+ 
+                     delete softmax;
+                 }
+ 
+                 float pred_ltrb[4];
+                 for (int k = 0; k < 4; k++)
+                 {
+                     float dis = 0.f;
+                     const float* dis_after_sm = pred_bbox.row(k);
+                     for (int l = 0; l < reg_max_1; l++)
+                     {
+                         dis += l * dis_after_sm[l];
+                     }
+ 
+                     pred_ltrb[k] = dis * stride;
+                 }
+ 
+                 float pb_cx = (x + 0.5f) * stride;
+                 float pb_cy = (y + 0.5f) * stride;
+ 
+                 const float angle = sigmoid(pred_angle.row(y * num_grid_x + x)[0]) - 0.25f;
+ 
+                 const float angle_rad = angle * 3.14159265358979323846f;
+                 const float angle_degree = angle * 180.f;
+ 
+                 float cos = cosf(angle_rad);
+                 float sin = sinf(angle_rad);
+ 
+                 float xx = (pred_ltrb[2] - pred_ltrb[0]) * 0.5f;
+                 float yy = (pred_ltrb[3] - pred_ltrb[1]) * 0.5f;
+                 float xr = xx * cos - yy * sin;
+                 float yr = xx * sin + yy * cos;
+                 const float cx = pb_cx + xr;
+                 const float cy = pb_cy + yr;
+                 const float ww = pred_ltrb[2] + pred_ltrb[0];
+                 const float hh = pred_ltrb[3] + pred_ltrb[1];
+ 
+                 Object obj;
+                 obj.rrect = cv::RotatedRect(cv::Point2f(cx, cy), cv::Size_<float>(ww, hh), angle_degree);
+                 obj.label = label;
+                 obj.prob = score;
+ 
+                 objects.push_back(obj);
+             }
+         }
+     }
+ }
+ 
+ static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_angle, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
+ {
+     const int w = in_pad.w;
+     const int h = in_pad.h;
+ 
+     int pred_row_offset = 0;
+     for (size_t i = 0; i < strides.size(); i++)
+     {
+         const int stride = strides[i];
+ 
+         const int num_grid_x = w / stride;
+         const int num_grid_y = h / stride;
+         const int num_grid = num_grid_x * num_grid_y;
+ 
+         generate_proposals(pred.row_range(pred_row_offset, num_grid), pred_angle.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects);
+ 
+         pred_row_offset += num_grid;
+     }
+ }
+ 
+ int YOLOv8_obb::detect(const cv::Mat& rgb, std::vector<Object>& objects)
+ {
+     const int target_size = det_target_size;//1024;
+     const float prob_threshold = 0.25f;
+     const float nms_threshold = 0.45f;
+ 
+     int img_w = rgb.cols;
+     int img_h = rgb.rows;
+ 
+     // ultralytics/cfg/models/v8/yolov8.yaml
+     std::vector<int> strides(3);
+     strides[0] = 8;
+     strides[1] = 16;
+     strides[2] = 32;
+     const int max_stride = 32;
+ 
+     // letterbox pad to multiple of max_stride
+     int w = img_w;
+     int h = img_h;
+     float scale = 1.f;
+     if (w > h)
+     {
+         scale = (float)target_size / w;
+         w = target_size;
+         h = h * scale;
+     }
+     else
+     {
+         scale = (float)target_size / h;
+         h = target_size;
+         w = w * scale;
+     }
+ 
+     ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
+ 
+     // letterbox pad to target_size rectangle
+     int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
+     int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
+     ncnn::Mat in_pad;
+     ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
+ 
+     const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
+     in_pad.substract_mean_normalize(0, norm_vals);
+ 
+     ncnn::Extractor ex = yolov8.create_extractor();
+ 
+     ex.input("in0", in_pad);
+ 
+     ncnn::Mat out;
+     ex.extract("out0", out);
+ 
+     ncnn::Mat out_angle;
+     ex.extract("out1", out_angle);
+ 
+     std::vector<Object> proposals;
+     generate_proposals(out, out_angle, strides, in_pad, prob_threshold, proposals);
+ 
+     // sort all proposals by score from highest to lowest
+     qsort_descent_inplace(proposals);
+ 
+     // apply nms with nms_threshold
+     std::vector<int> picked;
+     nms_sorted_bboxes(proposals, picked, nms_threshold);
+ 
+     int count = picked.size();
+     if (count == 0)
+         return 0;
+ 
+     objects.resize(count);
+     for (int i = 0; i < count; i++)
+     {
+         Object obj = proposals[picked[i]];
+ 
+         // adjust offset to original unpadded
+         obj.rrect.center.x = (obj.rrect.center.x - (wpad / 2)) / scale;
+         obj.rrect.center.y = (obj.rrect.center.y - (hpad / 2)) / scale;
+         obj.rrect.size.width = (obj.rrect.size.width) / scale;
+         obj.rrect.size.height = (obj.rrect.size.height) / scale;
+ 
+         objects[i] = obj;
+     }
+ 
+     return 0;
+ }
+ 
+ int YOLOv8_obb::draw(cv::Mat& rgb, const std::vector<Object>& objects)
+ {
+     static const char* class_names[] = {
+         "plane", "ship", "storage tank", "baseball diamond", "tennis court",
+         "basketball court", "ground track field", "harbor", "bridge", "large vehicle",
+         "small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool"
+     };
+ 
+     static const cv::Scalar colors[] = {
+         cv::Scalar( 39, 176, 156),
+         cv::Scalar( 58, 183, 103),
+         cv::Scalar( 81, 181,  63),
+         cv::Scalar(150, 243,  33),
+         cv::Scalar(169, 244,   3),
+         cv::Scalar(188, 212,   0),
+         cv::Scalar(150, 136,   0),
+         cv::Scalar(175,  80,  76),
+         cv::Scalar(195,  74, 139),
+         cv::Scalar(220,  57, 205),
+         cv::Scalar(235,  59, 255),
+         cv::Scalar(193,   7, 255),
+         cv::Scalar(152,   0, 255),
+         cv::Scalar( 87,  34, 255),
+         cv::Scalar( 85,  72, 121),
+         cv::Scalar(158, 158, 158),
+         cv::Scalar(125, 139,  96)
+     };
+ 
+     for (size_t i = 0; i < objects.size(); i++)
+     {
+         const Object& obj = objects[i];
+ 
+         const cv::Scalar& color = colors[obj.label];
+ 
+         // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f  @ %.2f\n", obj.label, obj.prob,
+                 // obj.rrect.center.x, obj.rrect.center.y, obj.rrect.size.width, obj.rrect.size.height, obj.rrect.angle);
+ 
+         cv::Point2f corners[4];
+         obj.rrect.points(corners);
+         cv::line(rgb, corners[0], corners[1], color);
+         cv::line(rgb, corners[1], corners[2], color);
+         cv::line(rgb, corners[2], corners[3], color);
+         cv::line(rgb, corners[3], corners[0], color);
+     }
+ 
+     for (size_t i = 0; i < objects.size(); i++)
+     {
+         const Object& obj = objects[i];
+ 
+         const cv::Scalar& color = colors[obj.label];
+ 
+         char text[256];
+         sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
+ 
+         int baseLine = 0;
+         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+ 
+         int x = obj.rrect.center.x - label_size.width / 2;
+         int y = obj.rrect.center.y - label_size.height / 2 - baseLine;
+         if (y < 0)
+             y = 0;
+         if (y + label_size.height > rgb.rows)
+             y = rgb.rows - label_size.height;
+         if (x < 0)
+             x = 0;
+         if (x + label_size.width > rgb.cols)
+             x = rgb.cols - label_size.width;
+ 
+         cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+                       cv::Scalar(255, 255, 255), -1);
+ 
+         cv::putText(rgb, text, cv::Point(x, y + label_size.height),
+                     cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
+     }
+ 
+     return 0;
+ }
--- a/livekit-android-track-processors/src/main/jni/yolov8_pose.cpp 0 → 100644
查看文件 @b55f961
+++ b/livekit-android-track-processors/src/main/jni/yolov8_pose.cpp 0 → 100644
查看文件 @b55f961
+ // Tencent is pleased to support the open source community by making ncnn available.
+ //
+ // Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
+ //
+ // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ // in compliance with the License. You may obtain a copy of the License at
+ //
+ // https://opensource.org/licenses/BSD-3-Clause
+ //
+ // Unless required by applicable law or agreed to in writing, software distributed
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ // specific language governing permissions and limitations under the License.
+ 
+ // 1. install
+ //      pip3 install -U ultralytics pnnx ncnn
+ // 2. export yolov8-pose torchscript
+ //      yolo export model=yolov8n-pose.pt format=torchscript
+ // 3. convert torchscript with static shape
+ //      pnnx yolov8n-pose.torchscript
+ // 4. modify yolov8n_pose_pnnx.py for dynamic shape inference
+ //      A. modify reshape to support dynamic image sizes
+ //      B. permute tensor before concat and adjust concat axis
+ //      C. drop post-process part
+ //      before:
+ //          v_137 = v_136.view(1, 51, 6400)
+ //          v_143 = v_142.view(1, 51, 1600)
+ //          v_149 = v_148.view(1, 51, 400)
+ //          v_150 = torch.cat((v_137, v_143, v_149), dim=-1)
+ //          ...
+ //          v_184 = v_161.view(1, 65, 6400)
+ //          v_185 = v_172.view(1, 65, 1600)
+ //          v_186 = v_183.view(1, 65, 400)
+ //          v_187 = torch.cat((v_184, v_185, v_186), dim=2)
+ //          ...
+ //      after:
+ //          v_137 = v_136.view(1, 51, -1).transpose(1, 2)
+ //          v_143 = v_142.view(1, 51, -1).transpose(1, 2)
+ //          v_149 = v_148.view(1, 51, -1).transpose(1, 2)
+ //          v_150 = torch.cat((v_137, v_143, v_149), dim=1)
+ //          ...
+ //          v_184 = v_161.view(1, 65, -1).transpose(1, 2)
+ //          v_185 = v_172.view(1, 65, -1).transpose(1, 2)
+ //          v_186 = v_183.view(1, 65, -1).transpose(1, 2)
+ //          v_187 = torch.cat((v_184, v_185, v_186), dim=1)
+ //          return v_187, v_150
+ // 5. re-export yolov8-pose torchscript
+ //      python3 -c 'import yolov8n_pose_pnnx; yolov8n_pose_pnnx.export_torchscript()'
+ // 6. convert new torchscript with dynamic shape
+ //      pnnx yolov8n_pose_pnnx.py.pt inputshape=[1,3,640,640] inputshape2=[1,3,320,320]
+ // 7. now you get ncnn model files
+ //      mv yolov8n_pose_pnnx.py.ncnn.param yolov8n_pose.ncnn.param
+ //      mv yolov8n_pose_pnnx.py.ncnn.bin yolov8n_pose.ncnn.bin
+ 
+ // the out blob would be a 2-dim tensor with w=65 h=8400
+ //
+ //        | bbox-reg 16 x 4       |score(1)|
+ //        +-----+-----+-----+-----+--------+
+ //        | dx0 | dy0 | dx1 | dy1 |   0.1  |
+ //   all /|     |     |     |     |        |
+ //  boxes |  .. |  .. |  .. |  .. |   0.0  |
+ //  (8400)|     |     |     |     |   .    |
+ //       \|     |     |     |     |   .    |
+ //        +-----+-----+-----+-----+--------+
+ //
+ 
+ //
+ //        | pose (51) |
+ //        +-----------+
+ //        |0.1........|
+ //   all /|           |
+ //  boxes |0.0........|
+ //  (8400)|     .     |
+ //       \|     .     |
+ //        +-----------+
+ //
+ 
+ #include "yolov8.h"
+ 
+ #include "layer.h"
+ 
+ #include <opencv2/core/core.hpp>
+ #include <opencv2/imgproc/imgproc.hpp>
+ 
+ #include <float.h>
+ #include <stdio.h>
+ #include <vector>
+ 
+ static inline float intersection_area(const Object& a, const Object& b)
+ {
+     cv::Rect_<float> inter = a.rect & b.rect;
+     return inter.area();
+ }
+ 
+ static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
+ {
+     int i = left;
+     int j = right;
+     float p = objects[(left + right) / 2].prob;
+ 
+     while (i <= j)
+     {
+         while (objects[i].prob > p)
+             i++;
+ 
+         while (objects[j].prob < p)
+             j--;
+ 
+         if (i <= j)
+         {
+             // swap
+             std::swap(objects[i], objects[j]);
+ 
+             i++;
+             j--;
+         }
+     }
+ 
+     // #pragma omp parallel sections
+     {
+         // #pragma omp section
+         {
+             if (left < j) qsort_descent_inplace(objects, left, j);
+         }
+         // #pragma omp section
+         {
+             if (i < right) qsort_descent_inplace(objects, i, right);
+         }
+     }
+ }
+ 
+ static void qsort_descent_inplace(std::vector<Object>& objects)
+ {
+     if (objects.empty())
+         return;
+ 
+     qsort_descent_inplace(objects, 0, objects.size() - 1);
+ }
+ 
+ static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
+ {
+     picked.clear();
+ 
+     const int n = objects.size();
+ 
+     std::vector<float> areas(n);
+     for (int i = 0; i < n; i++)
+     {
+         areas[i] = objects[i].rect.area();
+     }
+ 
+     for (int i = 0; i < n; i++)
+     {
+         const Object& a = objects[i];
+ 
+         int keep = 1;
+         for (int j = 0; j < (int)picked.size(); j++)
+         {
+             const Object& b = objects[picked[j]];
+ 
+             if (!agnostic && a.label != b.label)
+                 continue;
+ 
+             // intersection over union
+             float inter_area = intersection_area(a, b);
+             float union_area = areas[i] + areas[picked[j]] - inter_area;
+             // float IoU = inter_area / union_area
+             if (inter_area / union_area > nms_threshold)
+                 keep = 0;
+         }
+ 
+         if (keep)
+             picked.push_back(i);
+     }
+ }
+ 
+ static inline float sigmoid(float x)
+ {
+     return 1.0f / (1.0f + expf(-x));
+ }
+ 
+ static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_points, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
+ {
+     const int w = in_pad.w;
+     const int h = in_pad.h;
+ 
+     const int num_grid_x = w / stride;
+     const int num_grid_y = h / stride;
+ 
+     const int reg_max_1 = 16;
+     const int num_points = pred_points.w / 3;
+ 
+     for (int y = 0; y < num_grid_y; y++)
+     {
+         for (int x = 0; x < num_grid_x; x++)
+         {
+             const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1);
+             const ncnn::Mat pred_points_grid = pred_points.row_range(y * num_grid_x + x, 1).reshape(3, num_points);
+ 
+             // find label with max score
+             int label = 0;
+             float score = sigmoid(pred_grid[reg_max_1 * 4]);
+ 
+             if (score >= prob_threshold)
+             {
+                 ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4).clone();
+ 
+                 {
+                     ncnn::Layer* softmax = ncnn::create_layer("Softmax");
+ 
+                     ncnn::ParamDict pd;
+                     pd.set(0, 1); // axis
+                     pd.set(1, 1);
+                     softmax->load_param(pd);
+ 
+                     ncnn::Option opt;
+                     opt.num_threads = 1;
+                     opt.use_packing_layout = false;
+ 
+                     softmax->create_pipeline(opt);
+ 
+                     softmax->forward_inplace(pred_bbox, opt);
+ 
+                     softmax->destroy_pipeline(opt);
+ 
+                     delete softmax;
+                 }
+ 
+                 float pred_ltrb[4];
+                 for (int k = 0; k < 4; k++)
+                 {
+                     float dis = 0.f;
+                     const float* dis_after_sm = pred_bbox.row(k);
+                     for (int l = 0; l < reg_max_1; l++)
+                     {
+                         dis += l * dis_after_sm[l];
+                     }
+ 
+                     pred_ltrb[k] = dis * stride;
+                 }
+ 
+                 float pb_cx = (x + 0.5f) * stride;
+                 float pb_cy = (y + 0.5f) * stride;
+ 
+                 float x0 = pb_cx - pred_ltrb[0];
+                 float y0 = pb_cy - pred_ltrb[1];
+                 float x1 = pb_cx + pred_ltrb[2];
+                 float y1 = pb_cy + pred_ltrb[3];
+ 
+                 std::vector<KeyPoint> keypoints;
+                 for (int k = 0; k < num_points; k++)
+                 {
+                     KeyPoint keypoint;
+                     keypoint.p.x = (x + pred_points_grid.row(k)[0] * 2) * stride;
+                     keypoint.p.y = (y + pred_points_grid.row(k)[1] * 2) * stride;
+                     keypoint.prob = sigmoid(pred_points_grid.row(k)[2]);
+                     keypoints.push_back(keypoint);
+                 }
+ 
+                 Object obj;
+                 obj.rect.x = x0;
+                 obj.rect.y = y0;
+                 obj.rect.width = x1 - x0;
+                 obj.rect.height = y1 - y0;
+                 obj.label = label;
+                 obj.prob = score;
+                 obj.keypoints = keypoints;
+ 
+                 objects.push_back(obj);
+             }
+         }
+     }
+ }
+ 
+ static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_points, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
+ {
+     const int w = in_pad.w;
+     const int h = in_pad.h;
+ 
+     int pred_row_offset = 0;
+     for (size_t i = 0; i < strides.size(); i++)
+     {
+         const int stride = strides[i];
+ 
+         const int num_grid_x = w / stride;
+         const int num_grid_y = h / stride;
+         const int num_grid = num_grid_x * num_grid_y;
+ 
+         generate_proposals(pred.row_range(pred_row_offset, num_grid), pred_points.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects);
+ 
+         pred_row_offset += num_grid;
+     }
+ }
+ 
+ int YOLOv8_pose::detect(const cv::Mat& rgb, std::vector<Object>& objects)
+ {
+     const int target_size = det_target_size;//640;
+     const float prob_threshold = 0.25f;
+     const float nms_threshold = 0.45f;
+     const float mask_threshold = 0.5f;
+ 
+     int img_w = rgb.cols;
+     int img_h = rgb.rows;
+ 
+     // ultralytics/cfg/models/v8/yolov8.yaml
+     std::vector<int> strides(3);
+     strides[0] = 8;
+     strides[1] = 16;
+     strides[2] = 32;
+     const int max_stride = 32;
+ 
+     // letterbox pad to multiple of max_stride
+     int w = img_w;
+     int h = img_h;
+     float scale = 1.f;
+     if (w > h)
+     {
+         scale = (float)target_size / w;
+         w = target_size;
+         h = h * scale;
+     }
+     else
+     {
+         scale = (float)target_size / h;
+         h = target_size;
+         w = w * scale;
+     }
+ 
+     ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
+ 
+     // letterbox pad to target_size rectangle
+     int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
+     int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
+     ncnn::Mat in_pad;
+     ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
+ 
+     const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
+     in_pad.substract_mean_normalize(0, norm_vals);
+ 
+     ncnn::Extractor ex = yolov8.create_extractor();
+ 
+     ex.input("in0", in_pad);
+ 
+     ncnn::Mat out;
+     ex.extract("out0", out);
+ 
+     ncnn::Mat out_points;
+     ex.extract("out1", out_points);
+ 
+     std::vector<Object> proposals;
+     generate_proposals(out, out_points, strides, in_pad, prob_threshold, proposals);
+ 
+     // sort all proposals by score from highest to lowest
+     qsort_descent_inplace(proposals);
+ 
+     // apply nms with nms_threshold
+     std::vector<int> picked;
+     nms_sorted_bboxes(proposals, picked, nms_threshold);
+ 
+     int count = picked.size();
+     if (count == 0)
+         return 0;
+ 
+     const int num_points = out_points.w / 3;
+ 
+     objects.resize(count);
+     for (int i = 0; i < count; i++)
+     {
+         objects[i] = proposals[picked[i]];
+ 
+         // adjust offset to original unpadded
+         float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
+         float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
+         float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
+         float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
+ 
+         for (int j = 0; j < num_points; j++)
+         {
+             objects[i].keypoints[j].p.x = (objects[i].keypoints[j].p.x - (wpad / 2)) / scale;
+             objects[i].keypoints[j].p.y = (objects[i].keypoints[j].p.y - (hpad / 2)) / scale;
+         }
+ 
+         // clip
+         x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
+         y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
+         x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
+         y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
+ 
+         objects[i].rect.x = x0;
+         objects[i].rect.y = y0;
+         objects[i].rect.width = x1 - x0;
+         objects[i].rect.height = y1 - y0;
+     }
+ 
+     // sort objects by area
+     struct
+     {
+         bool operator()(const Object& a, const Object& b) const
+         {
+             return a.rect.area() > b.rect.area();
+         }
+     } objects_area_greater;
+     std::sort(objects.begin(), objects.end(), objects_area_greater);
+ 
+     return 0;
+ }
+ 
+ int YOLOv8_pose::draw(cv::Mat& rgb, const std::vector<Object>& objects)
+ {
+     static const char* class_names[] = {"person"};
+ 
+     static const cv::Scalar colors[] = {
+         cv::Scalar( 67,  54, 244),
+         cv::Scalar( 30,  99, 233),
+         cv::Scalar( 39, 176, 156),
+         cv::Scalar( 58, 183, 103),
+         cv::Scalar( 81, 181,  63),
+         cv::Scalar(150, 243,  33),
+         cv::Scalar(169, 244,   3),
+         cv::Scalar(188, 212,   0),
+         cv::Scalar(150, 136,   0),
+         cv::Scalar(175,  80,  76),
+         cv::Scalar(195,  74, 139),
+         cv::Scalar(220,  57, 205),
+         cv::Scalar(235,  59, 255),
+         cv::Scalar(193,   7, 255),
+         cv::Scalar(152,   0, 255),
+         cv::Scalar( 87,  34, 255),
+         cv::Scalar( 85,  72, 121),
+         cv::Scalar(158, 158, 158),
+         cv::Scalar(125, 139,  96)
+     };
+ 
+     for (size_t i = 0; i < objects.size(); i++)
+     {
+         const Object& obj = objects[i];
+ 
+         const cv::Scalar& color = colors[i % 19];
+ 
+         // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
+                 // obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
+ 
+         // draw bone
+         static const int joint_pairs[16][2] = {
+             {0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16}
+         };
+         static const cv::Scalar bone_colors[] = {
+             cv::Scalar(  0,   0, 255),
+             cv::Scalar(  0,   0, 255),
+             cv::Scalar(  0,   0, 255),
+             cv::Scalar(  0,   0, 255),
+             cv::Scalar(  0, 255, 128),
+             cv::Scalar(  0, 255, 128),
+             cv::Scalar(  0, 255, 128),
+             cv::Scalar(  0, 255, 128),
+             cv::Scalar(  0, 255, 128),
+             cv::Scalar(255, 255,  51),
+             cv::Scalar(255, 255,  51),
+             cv::Scalar(255, 255,  51),
+             cv::Scalar(255,  51, 153),
+             cv::Scalar(255,  51, 153),
+             cv::Scalar(255,  51, 153),
+             cv::Scalar(255,  51, 153),
+         };
+ 
+         for (int j = 0; j < 16; j++)
+         {
+             const KeyPoint& p1 = obj.keypoints[joint_pairs[j][0]];
+             const KeyPoint& p2 = obj.keypoints[joint_pairs[j][1]];
+ 
+             if (p1.prob < 0.2f || p2.prob < 0.2f)
+                 continue;
+ 
+             cv::line(rgb, p1.p, p2.p, bone_colors[j], 2);
+         }
+ 
+         // draw joint
+         for (size_t j = 0; j < obj.keypoints.size(); j++)
+         {
+             const KeyPoint& keypoint = obj.keypoints[j];
+ 
+             // fprintf(stderr, "%.2f %.2f = %.5f\n", keypoint.p.x, keypoint.p.y, keypoint.prob);
+ 
+             if (keypoint.prob < 0.2f)
+                 continue;
+ 
+             cv::circle(rgb, keypoint.p, 3, color, -1);
+         }
+ 
+         cv::rectangle(rgb, obj.rect, color);
+ 
+         char text[256];
+         sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
+ 
+         int baseLine = 0;
+         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+ 
+         int x = obj.rect.x;
+         int y = obj.rect.y - label_size.height - baseLine;
+         if (y < 0)
+             y = 0;
+         if (x + label_size.width > rgb.cols)
+             x = rgb.cols - label_size.width;
+ 
+         cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+                       cv::Scalar(255, 255, 255), -1);
+ 
+         cv::putText(rgb, text, cv::Point(x, y + label_size.height),
+                     cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
+     }
+ 
+     return 0;
+ }
--- a/livekit-android-track-processors/src/main/jni/yolov8_seg.cpp 0 → 100644
查看文件 @b55f961
+++ b/livekit-android-track-processors/src/main/jni/yolov8_seg.cpp 0 → 100644
查看文件 @b55f961
+ // Tencent is pleased to support the open source community by making ncnn available.
+ //
+ // Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
+ //
+ // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ // in compliance with the License. You may obtain a copy of the License at
+ //
+ // https://opensource.org/licenses/BSD-3-Clause
+ //
+ // Unless required by applicable law or agreed to in writing, software distributed
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ // specific language governing permissions and limitations under the License.
+ 
+ // 1. install
+ //      pip3 install -U ultralytics pnnx ncnn
+ // 2. export yolov8-seg torchscript
+ //      yolo export model=yolov8n-seg.pt format=torchscript
+ // 3. convert torchscript with static shape
+ //      pnnx yolov8n-seg.torchscript
+ // 4. modify yolov8n_seg_pnnx.py for dynamic shape inference
+ //      A. modify reshape to support dynamic image sizes
+ //      B. permute tensor before concat and adjust concat axis
+ //      C. drop post-process part
+ //      before:
+ //          v_144 = v_143.view(1, 32, 6400)
+ //          v_150 = v_149.view(1, 32, 1600)
+ //          v_156 = v_155.view(1, 32, 400)
+ //          v_157 = torch.cat((v_144, v_150, v_156), dim=2)
+ //          ...
+ //          v_191 = v_168.view(1, 144, 6400)
+ //          v_192 = v_179.view(1, 144, 1600)
+ //          v_193 = v_190.view(1, 144, 400)
+ //          v_194 = torch.cat((v_191, v_192, v_193), dim=2)
+ //          ...
+ //          v_215 = (v_214, v_138, )
+ //          return v_215
+ //      after:
+ //          v_144 = v_143.view(1, 32, -1).transpose(1, 2)
+ //          v_150 = v_149.view(1, 32, -1).transpose(1, 2)
+ //          v_156 = v_155.view(1, 32, -1).transpose(1, 2)
+ //          v_157 = torch.cat((v_144, v_150, v_156), dim=1)
+ //          ...
+ //          v_191 = v_168.view(1, 144, -1).transpose(1, 2)
+ //          v_192 = v_179.view(1, 144, -1).transpose(1, 2)
+ //          v_193 = v_190.view(1, 144, -1).transpose(1, 2)
+ //          v_194 = torch.cat((v_191, v_192, v_193), dim=1)
+ //          return v_194, v_157, v_138
+ // 5. re-export yolov8-seg torchscript
+ //      python3 -c 'import yolov8n_seg_pnnx; yolov8n_seg_pnnx.export_torchscript()'
+ // 6. convert new torchscript with dynamic shape
+ //      pnnx yolov8n_seg_pnnx.py.pt inputshape=[1,3,640,640] inputshape2=[1,3,320,320]
+ // 7. now you get ncnn model files
+ //      mv yolov8n_seg_pnnx.py.ncnn.param yolov8n_seg.ncnn.param
+ //      mv yolov8n_seg_pnnx.py.ncnn.bin yolov8n_seg.ncnn.bin
+ 
+ // the out blob would be a 2-dim tensor with w=176 h=8400
+ //
+ //        | bbox-reg 16 x 4       | per-class scores(80) |
+ //        +-----+-----+-----+-----+----------------------+
+ //        | dx0 | dy0 | dx1 | dy1 |0.1 0.0 0.0 0.5 ......|
+ //   all /|     |     |     |     |           .          |
+ //  boxes |  .. |  .. |  .. |  .. |0.0 0.9 0.0 0.0 ......|
+ //  (8400)|     |     |     |     |           .          |
+ //       \|     |     |     |     |           .          |
+ //        +-----+-----+-----+-----+----------------------+
+ //
+ 
+ //
+ //        | mask (32) |
+ //        +-----------+
+ //        |0.1........|
+ //   all /|           |
+ //  boxes |0.0........|
+ //  (8400)|     .     |
+ //       \|     .     |
+ //        +-----------+
+ //
+ 
+ #include "yolov8.h"
+ 
+ #include "layer.h"
+ 
+ #include <opencv2/core/core.hpp>
+ #include <opencv2/imgproc/imgproc.hpp>
+ 
+ #include <float.h>
+ #include <stdio.h>
+ #include <vector>
+ 
+ static inline float intersection_area(const Object& a, const Object& b)
+ {
+     cv::Rect_<float> inter = a.rect & b.rect;
+     return inter.area();
+ }
+ 
+ static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
+ {
+     int i = left;
+     int j = right;
+     float p = objects[(left + right) / 2].prob;
+ 
+     while (i <= j)
+     {
+         while (objects[i].prob > p)
+             i++;
+ 
+         while (objects[j].prob < p)
+             j--;
+ 
+         if (i <= j)
+         {
+             // swap
+             std::swap(objects[i], objects[j]);
+ 
+             i++;
+             j--;
+         }
+     }
+ 
+     // #pragma omp parallel sections
+     {
+         // #pragma omp section
+         {
+             if (left < j) qsort_descent_inplace(objects, left, j);
+         }
+         // #pragma omp section
+         {
+             if (i < right) qsort_descent_inplace(objects, i, right);
+         }
+     }
+ }
+ 
+ static void qsort_descent_inplace(std::vector<Object>& objects)
+ {
+     if (objects.empty())
+         return;
+ 
+     qsort_descent_inplace(objects, 0, objects.size() - 1);
+ }
+ 
+ static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
+ {
+     picked.clear();
+ 
+     const int n = objects.size();
+ 
+     std::vector<float> areas(n);
+     for (int i = 0; i < n; i++)
+     {
+         areas[i] = objects[i].rect.area();
+     }
+ 
+     for (int i = 0; i < n; i++)
+     {
+         const Object& a = objects[i];
+ 
+         int keep = 1;
+         for (int j = 0; j < (int)picked.size(); j++)
+         {
+             const Object& b = objects[picked[j]];
+ 
+             if (!agnostic && a.label != b.label)
+                 continue;
+ 
+             // intersection over union
+             float inter_area = intersection_area(a, b);
+             float union_area = areas[i] + areas[picked[j]] - inter_area;
+             // float IoU = inter_area / union_area
+             if (inter_area / union_area > nms_threshold)
+                 keep = 0;
+         }
+ 
+         if (keep)
+             picked.push_back(i);
+     }
+ }
+ 
+ static inline float sigmoid(float x)
+ {
+     return 1.0f / (1.0f + expf(-x));
+ }
+ 
+ static void generate_proposals(const ncnn::Mat& pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
+ {
+     const int w = in_pad.w;
+     const int h = in_pad.h;
+ 
+     const int num_grid_x = w / stride;
+     const int num_grid_y = h / stride;
+ 
+     const int reg_max_1 = 16;
+     const int num_class = pred.w - reg_max_1 * 4; // number of classes. 80 for COCO
+ 
+     for (int y = 0; y < num_grid_y; y++)
+     {
+         for (int x = 0; x < num_grid_x; x++)
+         {
+             const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1);
+ 
+             // find label with max score
+             int label = -1;
+             float score = -FLT_MAX;
+             {
+                 const ncnn::Mat pred_score = pred_grid.range(reg_max_1 * 4, num_class);
+ 
+                 for (int k = 0; k < num_class; k++)
+                 {
+                     float s = pred_score[k];
+                     if (s > score)
+                     {
+                         label = k;
+                         score = s;
+                     }
+                 }
+ 
+                 score = sigmoid(score);
+             }
+ 
+             if (score >= prob_threshold)
+             {
+                 ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4).clone();
+ 
+                 {
+                     ncnn::Layer* softmax = ncnn::create_layer("Softmax");
+ 
+                     ncnn::ParamDict pd;
+                     pd.set(0, 1); // axis
+                     pd.set(1, 1);
+                     softmax->load_param(pd);
+ 
+                     ncnn::Option opt;
+                     opt.num_threads = 1;
+                     opt.use_packing_layout = false;
+ 
+                     softmax->create_pipeline(opt);
+ 
+                     softmax->forward_inplace(pred_bbox, opt);
+ 
+                     softmax->destroy_pipeline(opt);
+ 
+                     delete softmax;
+                 }
+ 
+                 float pred_ltrb[4];
+                 for (int k = 0; k < 4; k++)
+                 {
+                     float dis = 0.f;
+                     const float* dis_after_sm = pred_bbox.row(k);
+                     for (int l = 0; l < reg_max_1; l++)
+                     {
+                         dis += l * dis_after_sm[l];
+                     }
+ 
+                     pred_ltrb[k] = dis * stride;
+                 }
+ 
+                 float pb_cx = (x + 0.5f) * stride;
+                 float pb_cy = (y + 0.5f) * stride;
+ 
+                 float x0 = pb_cx - pred_ltrb[0];
+                 float y0 = pb_cy - pred_ltrb[1];
+                 float x1 = pb_cx + pred_ltrb[2];
+                 float y1 = pb_cy + pred_ltrb[3];
+ 
+                 Object obj;
+                 obj.rect.x = x0;
+                 obj.rect.y = y0;
+                 obj.rect.width = x1 - x0;
+                 obj.rect.height = y1 - y0;
+                 obj.label = label;
+                 obj.prob = score;
+                 obj.gindex = y * num_grid_x + x;
+ 
+                 objects.push_back(obj);
+             }
+         }
+     }
+ }
+ 
+ static void generate_proposals(const ncnn::Mat& pred, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
+ {
+     const int w = in_pad.w;
+     const int h = in_pad.h;
+ 
+     int pred_row_offset = 0;
+     for (size_t i = 0; i < strides.size(); i++)
+     {
+         const int stride = strides[i];
+ 
+         const int num_grid_x = w / stride;
+         const int num_grid_y = h / stride;
+         const int num_grid = num_grid_x * num_grid_y;
+ 
+         std::vector<Object> objects_stride;
+         generate_proposals(pred.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects_stride);
+ 
+         for (size_t j = 0; j < objects_stride.size(); j++)
+         {
+             Object obj = objects_stride[j];
+             obj.gindex += pred_row_offset;
+             objects.push_back(obj);
+         }
+ 
+         pred_row_offset += num_grid;
+     }
+ }
+ 
+ int YOLOv8_seg::detect(const cv::Mat& rgb, std::vector<Object>& objects)
+ {
+     const int target_size = det_target_size;//640;
+     const float prob_threshold = 0.25f;
+     const float nms_threshold = 0.45f;
+     const float mask_threshold = 0.5f;
+ 
+     int img_w = rgb.cols;
+     int img_h = rgb.rows;
+ 
+     // ultralytics/cfg/models/v8/yolov8.yaml
+     std::vector<int> strides(3);
+     strides[0] = 8;
+     strides[1] = 16;
+     strides[2] = 32;
+     const int max_stride = 32;
+ 
+     // letterbox pad to multiple of max_stride
+     int w = img_w;
+     int h = img_h;
+     float scale = 1.f;
+     if (w > h)
+     {
+         scale = (float)target_size / w;
+         w = target_size;
+         h = h * scale;
+     }
+     else
+     {
+         scale = (float)target_size / h;
+         h = target_size;
+         w = w * scale;
+     }
+ 
+     ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
+ 
+     // letterbox pad to target_size rectangle
+     int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
+     int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
+     ncnn::Mat in_pad;
+     ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
+ 
+     const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
+     in_pad.substract_mean_normalize(0, norm_vals);
+ 
+     ncnn::Extractor ex = yolov8.create_extractor();
+ 
+     ex.input("in0", in_pad);
+ 
+     ncnn::Mat out;
+     ex.extract("out0", out);
+ 
+     std::vector<Object> proposals;
+     generate_proposals(out, strides, in_pad, prob_threshold, proposals);
+ 
+     // sort all proposals by score from highest to lowest
+     qsort_descent_inplace(proposals);
+ 
+     // apply nms with nms_threshold
+     std::vector<int> picked;
+     nms_sorted_bboxes(proposals, picked, nms_threshold);
+ 
+     int count = picked.size();
+     if (count == 0)
+         return 0;
+ 
+     ncnn::Mat mask_feat;
+     ex.extract("out1", mask_feat);
+ 
+     ncnn::Mat mask_protos;
+     ex.extract("out2", mask_protos);
+ 
+     ncnn::Mat objects_mask_feat(mask_feat.w, 1, count);
+ 
+     objects.resize(count);
+     for (int i = 0; i < count; i++)
+     {
+         objects[i] = proposals[picked[i]];
+ 
+         // adjust offset to original unpadded
+         float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
+         float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
+         float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
+         float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
+ 
+         // clip
+         x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
+         y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
+         x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
+         y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
+ 
+         objects[i].rect.x = x0;
+         objects[i].rect.y = y0;
+         objects[i].rect.width = x1 - x0;
+         objects[i].rect.height = y1 - y0;
+ 
+         // pick mask feat
+         memcpy(objects_mask_feat.channel(i), mask_feat.row(objects[i].gindex), mask_feat.w * sizeof(float));
+     }
+ 
+     // process mask
+     ncnn::Mat objects_mask;
+     {
+         ncnn::Layer* gemm = ncnn::create_layer("Gemm");
+ 
+         ncnn::ParamDict pd;
+         pd.set(6, 1); // constantC
+         pd.set(7, count); // constantM
+         pd.set(8, mask_protos.w * mask_protos.h); // constantN
+         pd.set(9, mask_feat.w); // constantK
+         pd.set(10, -1); // constant_broadcast_type_C
+         pd.set(11, 1); // output_N1M
+         gemm->load_param(pd);
+ 
+         ncnn::Option opt;
+         opt.num_threads = 1;
+         opt.use_packing_layout = false;
+ 
+         gemm->create_pipeline(opt);
+ 
+         std::vector<ncnn::Mat> gemm_inputs(2);
+         gemm_inputs[0] = objects_mask_feat;
+         gemm_inputs[1] = mask_protos.reshape(mask_protos.w * mask_protos.h, 1, mask_protos.c);
+         std::vector<ncnn::Mat> gemm_outputs(1);
+         gemm->forward(gemm_inputs, gemm_outputs, opt);
+         objects_mask = gemm_outputs[0].reshape(mask_protos.w, mask_protos.h, count);
+ 
+         gemm->destroy_pipeline(opt);
+ 
+         delete gemm;
+     }
+     {
+         ncnn::Layer* sigmoid = ncnn::create_layer("Sigmoid");
+ 
+         ncnn::Option opt;
+         opt.num_threads = 1;
+         opt.use_packing_layout = false;
+ 
+         sigmoid->create_pipeline(opt);
+ 
+         sigmoid->forward_inplace(objects_mask, opt);
+ 
+         sigmoid->destroy_pipeline(opt);
+ 
+         delete sigmoid;
+     }
+ 
+     // resize mask map
+     {
+         ncnn::Mat objects_mask_resized;
+         ncnn::resize_bilinear(objects_mask, objects_mask_resized, in_pad.w / scale, in_pad.h / scale);
+         objects_mask = objects_mask_resized;
+     }
+ 
+     // create per-object mask
+     for (int i = 0; i < count; i++)
+     {
+         Object& obj = objects[i];
+ 
+         const ncnn::Mat mm = objects_mask.channel(i);
+ 
+         obj.mask = cv::Mat((int)obj.rect.height, (int)obj.rect.width, CV_8UC1);
+ 
+         // adjust offset to original unpadded and clip inside object box
+         for (int y = 0; y < (int)obj.rect.height; y++)
+         {
+             const float* pmm = mm.row((int)(hpad / 2 / scale + obj.rect.y + y)) + (int)(wpad / 2 / scale + obj.rect.x);
+             uchar* pmask = obj.mask.ptr<uchar>(y);
+             for (int x = 0; x < (int)obj.rect.width; x++)
+             {
+                 pmask[x] = pmm[x] > mask_threshold ? 1 : 0;
+             }
+         }
+     }
+ 
+     // sort objects by area
+     struct
+     {
+         bool operator()(const Object& a, const Object& b) const
+         {
+             return a.rect.area() > b.rect.area();
+         }
+     } objects_area_greater;
+     std::sort(objects.begin(), objects.end(), objects_area_greater);
+ 
+     return 0;
+ }
+ 
+ int YOLOv8_seg::draw(cv::Mat& rgb, const std::vector<Object>& objects)
+ {
+     static const char* class_names[] = {
+         "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+         "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+         "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+         "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+         "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+         "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+         "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+         "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+         "hair drier", "toothbrush"
+     };
+ 
+     static cv::Scalar colors[] = {
+         cv::Scalar( 67,  54, 244),
+         cv::Scalar( 30,  99, 233),
+         cv::Scalar( 39, 176, 156),
+         cv::Scalar( 58, 183, 103),
+         cv::Scalar( 81, 181,  63),
+         cv::Scalar(150, 243,  33),
+         cv::Scalar(169, 244,   3),
+         cv::Scalar(188, 212,   0),
+         cv::Scalar(150, 136,   0),
+         cv::Scalar(175,  80,  76),
+         cv::Scalar(195,  74, 139),
+         cv::Scalar(220,  57, 205),
+         cv::Scalar(235,  59, 255),
+         cv::Scalar(193,   7, 255),
+         cv::Scalar(152,   0, 255),
+         cv::Scalar( 87,  34, 255),
+         cv::Scalar( 85,  72, 121),
+         cv::Scalar(158, 158, 158),
+         cv::Scalar(125, 139,  96)
+     };
+ 
+     for (size_t i = 0; i < objects.size(); i++)
+     {
+         const Object& obj = objects[i];
+ 
+         const cv::Scalar& color = colors[i % 19];
+ 
+         // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
+                 // obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
+ 
+         for (int y = 0; y < (int)obj.rect.height; y++)
+         {
+             const uchar* maskptr = obj.mask.ptr<const uchar>(y);
+             uchar* bgrptr = rgb.ptr<uchar>((int)obj.rect.y + y) + (int)obj.rect.x * 3;
+             for (int x = 0; x < (int)obj.rect.width; x++)
+             {
+                 if (maskptr[x])
+                 {
+                     bgrptr[0] = bgrptr[0] * 0.5 + color[0] * 0.5;
+                     bgrptr[1] = bgrptr[1] * 0.5 + color[1] * 0.5;
+                     bgrptr[2] = bgrptr[2] * 0.5 + color[2] * 0.5;
+                 }
+                 bgrptr += 3;
+             }
+         }
+ 
+         cv::rectangle(rgb, obj.rect, color);
+ 
+         char text[256];
+         sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
+ 
+         int baseLine = 0;
+         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+ 
+         int x = obj.rect.x;
+         int y = obj.rect.y - label_size.height - baseLine;
+         if (y < 0)
+             y = 0;
+         if (x + label_size.width > rgb.cols)
+             x = rgb.cols - label_size.width;
+ 
+         cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+                       cv::Scalar(255, 255, 255), -1);
+ 
+         cv::putText(rgb, text, cv::Point(x, y + label_size.height),
+                     cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
+     }
+ 
+     return 0;
+ }
--- a/livekit-android-track-processors/src/main/jni/yolov8ncnn.cpp 0 → 100644
查看文件 @b55f961
+++ b/livekit-android-track-processors/src/main/jni/yolov8ncnn.cpp 0 → 100644
查看文件 @b55f961
+ // Tencent is pleased to support the open source community by making ncnn available.
+ //
+ // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
+ //
+ // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ // in compliance with the License. You may obtain a copy of the License at
+ //
+ // https://opensource.org/licenses/BSD-3-Clause
+ //
+ // Unless required by applicable law or agreed to in writing, software distributed
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ // specific language governing permissions and limitations under the License.
+ 
+ #include <android/asset_manager_jni.h>
+ #include <android/native_window_jni.h>
+ #include <android/native_window.h>
+ 
+ #include <android/log.h>
+ 
+ #include <jni.h>
+ 
+ #include <string>
+ #include <vector>
+ 
+ #include <platform.h>
+ #include <benchmark.h>
+ 
+ #include "yolov8.h"
+ 
+ #include "ndkcamera.h"
+ 
+ #include <opencv2/core/core.hpp>
+ #include <opencv2/imgproc/imgproc.hpp>
+ 
+ #if __ARM_NEON
+ #include <arm_neon.h>
+ #endif // __ARM_NEON
+ 
+ static int draw_unsupported(cv::Mat& rgb)
+ {
+     const char text[] = "unsupported";
+ 
+     int baseLine = 0;
+     cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 1.0, 1, &baseLine);
+ 
+     int y = (rgb.rows - label_size.height) / 2;
+     int x = (rgb.cols - label_size.width) / 2;
+ 
+     cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+                   cv::Scalar(255, 255, 255), -1);
+ 
+     cv::putText(rgb, text, cv::Point(x, y + label_size.height),
+                 cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(0, 0, 0));
+ 
+     return 0;
+ }
+ 
+ static int draw_fps(cv::Mat& rgb)
+ {
+     // resolve moving average
+     float avg_fps = 0.f;
+     {
+         static double t0 = 0.f;
+         static float fps_history[10] = {0.f};
+ 
+         double t1 = ncnn::get_current_time();
+         if (t0 == 0.f)
+         {
+             t0 = t1;
+             return 0;
+         }
+ 
+         float fps = 1000.f / (t1 - t0);
+         t0 = t1;
+ 
+         for (int i = 9; i >= 1; i--)
+         {
+             fps_history[i] = fps_history[i - 1];
+         }
+         fps_history[0] = fps;
+ 
+         if (fps_history[9] == 0.f)
+         {
+             return 0;
+         }
+ 
+         for (int i = 0; i < 10; i++)
+         {
+             avg_fps += fps_history[i];
+         }
+         avg_fps /= 10.f;
+     }
+ 
+     char text[32];
+     sprintf(text, "FPS=%.2f", avg_fps);
+ 
+     int baseLine = 0;
+     cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+ 
+     int y = 0;
+     int x = rgb.cols - label_size.width;
+ 
+     cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+                   cv::Scalar(255, 255, 255), -1);
+ 
+     cv::putText(rgb, text, cv::Point(x, y + label_size.height),
+                 cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
+ 
+     return 0;
+ }
+ 
+ static YOLOv8* g_yolov8 = 0;
+ static ncnn::Mutex lock;
+ 
+ class MyNdkCamera : public NdkCameraWindow
+ {
+ public:
+     virtual void on_image_render(cv::Mat& rgb) const;
+ };
+ 
+ void MyNdkCamera::on_image_render(cv::Mat& rgb) const
+ {
+     // yolov8
+     {
+         ncnn::MutexLockGuard g(lock);
+ 
+         if (g_yolov8)
+         {
+             std::vector<Object> objects;
+             g_yolov8->detect(rgb, objects);
+ 
+             g_yolov8->draw(rgb, objects);
+         }
+         else
+         {
+             draw_unsupported(rgb);
+         }
+     }
+ 
+     draw_fps(rgb);
+ }
+ 
+ static MyNdkCamera* g_camera = 0;
+ 
+ extern "C" {
+ 
+ JNIEXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved)
+ {
+     __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "JNI_OnLoad");
+ 
+     g_camera = new MyNdkCamera;
+ 
+     ncnn::create_gpu_instance();
+ 
+     return JNI_VERSION_1_4;
+ }
+ 
+ JNIEXPORT void JNI_OnUnload(JavaVM* vm, void* reserved)
+ {
+     __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "JNI_OnUnload");
+ 
+     {
+         ncnn::MutexLockGuard g(lock);
+ 
+         delete g_yolov8;
+         g_yolov8 = 0;
+     }
+ 
+     ncnn::destroy_gpu_instance();
+ 
+     delete g_camera;
+     g_camera = 0;
+ }
+ 
+ // public native boolean loadModel(AssetManager mgr, int taskid, int modelid, int cpugpu);
+ JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_loadModel(JNIEnv* env, jobject thiz, jobject assetManager, jint taskid, jint modelid, jint cpugpu)
+ {
+     if (taskid < 0 || taskid > 5 || modelid < 0 || modelid > 8 || cpugpu < 0 || cpugpu > 2)
+     {
+         return JNI_FALSE;
+     }
+ 
+     AAssetManager* mgr = AAssetManager_fromJava(env, assetManager);
+ 
+     __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "loadModel %p", mgr);
+ 
+     const char* tasknames[6] =
+             {
+                     "",
+                     "_oiv7",
+                     "_seg",
+                     "_pose",
+                     "_cls",
+                     "_obb"
+             };
+ 
+     const char* modeltypes[9] =
+             {
+                     "n",
+                     "s",
+                     "m",
+                     "n",
+                     "s",
+                     "m",
+                     "n",
+                     "s",
+                     "m"
+             };
+ 
+     std::string parampath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.param";
+     std::string modelpath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.bin";
+     bool use_gpu = (int)cpugpu == 1;
+     bool use_turnip = (int)cpugpu == 2;
+ 
+     // reload
+     {
+         ncnn::MutexLockGuard g(lock);
+ 
+         {
+             static int old_taskid = 0;
+             static int old_modelid = 0;
+             static int old_cpugpu = 0;
+             if (taskid != old_taskid || (modelid % 3) != old_modelid || cpugpu != old_cpugpu)
+             {
+                 // taskid or model or cpugpu changed
+                 delete g_yolov8;
+                 g_yolov8 = 0;
+             }
+             old_taskid = taskid;
+             old_modelid = modelid % 3;
+             old_cpugpu = cpugpu;
+ 
+             ncnn::destroy_gpu_instance();
+ 
+             if (use_turnip)
+             {
+                 ncnn::create_gpu_instance("libvulkan_freedreno.so");
+             }
+             else if (use_gpu)
+             {
+                 ncnn::create_gpu_instance();
+             }
+ 
+             if (!g_yolov8)
+             {
+                 if (taskid == 0) g_yolov8 = new YOLOv8_det_coco;
+                 if (taskid == 1) g_yolov8 = new YOLOv8_det_oiv7;
+                 if (taskid == 2) g_yolov8 = new YOLOv8_seg;
+                 if (taskid == 3) g_yolov8 = new YOLOv8_pose;
+                 if (taskid == 4) g_yolov8 = new YOLOv8_cls;
+                 if (taskid == 5) g_yolov8 = new YOLOv8_obb;
+ 
+                 g_yolov8->load(mgr, parampath.c_str(), modelpath.c_str(), use_gpu || use_turnip);
+             }
+             int target_size = 320;
+             if ((int)modelid >= 3)
+                 target_size = 480;
+             if ((int)modelid >= 6)
+                 target_size = 640;
+             g_yolov8->set_det_target_size(target_size);
+         }
+     }
+ 
+     return JNI_TRUE;
+ }
+ 
+ // public native boolean openCamera(int facing);
+ JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_openCamera(JNIEnv* env, jobject thiz, jint facing)
+ {
+     if (facing < 0 || facing > 1)
+         return JNI_FALSE;
+ 
+     __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "openCamera %d", facing);
+ 
+     g_camera->open((int)facing);
+ 
+     return JNI_TRUE;
+ }
+ 
+ // public native boolean closeCamera();
+ JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_closeCamera(JNIEnv* env, jobject thiz)
+ {
+     __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "closeCamera");
+ 
+     g_camera->close();
+ 
+     return JNI_TRUE;
+ }
+ 
+ // public native boolean setOutputWindow(Surface surface);
+ JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_setOutputWindow(JNIEnv* env, jobject thiz, jobject surface)
+ {
+     ANativeWindow* win = ANativeWindow_fromSurface(env, surface);
+ 
+     __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "setOutputWindow %p", win);
+ 
+     g_camera->set_window(win);
+ 
+     return JNI_TRUE;
+ }
+ 
+ }