xuning

引入yolo8

// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "yolov8.h"
YOLOv8::~YOLOv8()
{
det_target_size = 320;
}
int YOLOv8::load(const char* parampath, const char* modelpath, bool use_gpu)
{
yolov8.clear();
yolov8.opt = ncnn::Option();
#if NCNN_VULKAN
yolov8.opt.use_vulkan_compute = use_gpu;
#endif
yolov8.load_param(parampath);
yolov8.load_model(modelpath);
return 0;
}
int YOLOv8::load(AAssetManager* mgr, const char* parampath, const char* modelpath, bool use_gpu)
{
yolov8.clear();
yolov8.opt = ncnn::Option();
#if NCNN_VULKAN
yolov8.opt.use_vulkan_compute = use_gpu;
#endif
yolov8.load_param(mgr, parampath);
yolov8.load_model(mgr, modelpath);
return 0;
}
void YOLOv8::set_det_target_size(int target_size)
{
det_target_size = target_size;
}
... ...
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef YOLOV8_H
#define YOLOV8_H
#include <opencv2/core/core.hpp>
#include <net.h>
struct KeyPoint
{
cv::Point2f p;
float prob;
};
struct Object
{
cv::Rect_<float> rect;
cv::RotatedRect rrect;
int label;
float prob;
int gindex;
cv::Mat mask;
std::vector<KeyPoint> keypoints;
};
class YOLOv8
{
public:
virtual ~YOLOv8();
int load(const char* parampath, const char* modelpath, bool use_gpu = false);
int load(AAssetManager* mgr, const char* parampath, const char* modelpath, bool use_gpu = false);
void set_det_target_size(int target_size);
virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects) = 0;
virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects) = 0;
protected:
ncnn::Net yolov8;
int det_target_size;
};
class YOLOv8_det : public YOLOv8
{
public:
virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects);
};
class YOLOv8_det_coco : public YOLOv8_det
{
public:
virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
};
class YOLOv8_det_oiv7 : public YOLOv8_det
{
public:
virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
};
class YOLOv8_seg : public YOLOv8
{
public:
virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects);
virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
};
class YOLOv8_pose : public YOLOv8
{
public:
virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects);
virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
};
class YOLOv8_cls : public YOLOv8
{
public:
virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects);
virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
};
class YOLOv8_obb : public YOLOv8
{
public:
virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects);
virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects);
};
#endif // YOLOV8_H
... ...
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// 1. install
// pip3 install -U ultralytics pnnx ncnn
// 2. export yolov8-cls torchscript
// yolo export model=yolov8n-cls.pt format=torchscript
// 3. convert torchscript with static shape
// pnnx yolov8n-cls.torchscript
// 4. now you get ncnn model files
// yolov8n_cls.ncnn.param
// yolov8n_cls.ncnn.bin
#include "yolov8.h"
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <float.h>
#include <stdio.h>
#include <vector>
static void get_topk(const ncnn::Mat& cls_scores, int topk, std::vector<Object>& objects)
{
// partial sort topk with index
int size = cls_scores.w;
std::vector<std::pair<float, int> > vec;
vec.resize(size);
for (int i = 0; i < size; i++)
{
vec[i] = std::make_pair(cls_scores[i], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int> >());
objects.resize(topk);
for (int i = 0; i < topk; i++)
{
objects[i].label = vec[i].second;
objects[i].prob = vec[i].first;
}
}
int YOLOv8_cls::detect(const cv::Mat& rgb, std::vector<Object>& objects)
{
const int target_size = 224;
const int topk = 5;
int img_w = rgb.cols;
int img_h = rgb.rows;
// letterbox pad
int w = img_w;
int h = img_h;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
// letterbox pad to target_size rectangle
int wpad = target_size - w;
int hpad = target_size - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = yolov8.create_extractor();
ex.input("in0", in_pad);
ncnn::Mat out;
ex.extract("out0", out);
// return top-5
get_topk(out, topk, objects);
return 0;
}
int YOLOv8_cls::draw(cv::Mat& rgb, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"tench", "goldfish", "great white shark", "tiger shark", "hammerhead", "electric ray", "stingray", "cock",
"hen", "ostrich", "brambling", "goldfinch", "house finch", "junco", "indigo bunting", "robin", "bulbul",
"jay", "magpie", "chickadee", "water ouzel", "kite", "bald eagle", "vulture", "great grey owl",
"European fire salamander", "common newt", "eft", "spotted salamander", "axolotl", "bullfrog", "tree frog",
"tailed frog", "loggerhead", "leatherback turtle", "mud turtle", "terrapin", "box turtle", "banded gecko",
"common iguana", "American chameleon", "whiptail", "agama", "frilled lizard", "alligator lizard",
"Gila monster", "green lizard", "African chameleon", "Komodo dragon", "African crocodile",
"American alligator", "triceratops", "thunder snake", "ringneck snake", "hognose snake", "green snake",
"king snake", "garter snake", "water snake", "vine snake", "night snake", "boa constrictor", "rock python",
"Indian cobra", "green mamba", "sea snake", "horned viper", "diamondback", "sidewinder", "trilobite",
"harvestman", "scorpion", "black and gold garden spider", "barn spider", "garden spider", "black widow",
"tarantula", "wolf spider", "tick", "centipede", "black grouse", "ptarmigan", "ruffed grouse",
"prairie chicken", "peacock", "quail", "partridge", "African grey", "macaw", "sulphur-crested cockatoo",
"lorikeet", "coucal", "bee eater", "hornbill", "hummingbird", "jacamar", "toucan", "drake",
"red-breasted merganser", "goose", "black swan", "tusker", "echidna", "platypus", "wallaby", "koala",
"wombat", "jellyfish", "sea anemone", "brain coral", "flatworm", "nematode", "conch", "snail", "slug",
"sea slug", "chiton", "chambered nautilus", "Dungeness crab", "rock crab", "fiddler crab", "king crab",
"American lobster", "spiny lobster", "crayfish", "hermit crab", "isopod", "white stork", "black stork",
"spoonbill", "flamingo", "little blue heron", "American egret", "bittern", "crane (bird)", "limpkin",
"European gallinule", "American coot", "bustard", "ruddy turnstone", "red-backed sandpiper", "redshank",
"dowitcher", "oystercatcher", "pelican", "king penguin", "albatross", "grey whale", "killer whale",
"dugong", "sea lion", "Chihuahua", "Japanese spaniel", "Maltese dog", "Pekinese", "Shih-Tzu",
"Blenheim spaniel", "papillon", "toy terrier", "Rhodesian ridgeback", "Afghan hound", "basset", "beagle",
"bloodhound", "bluetick", "black-and-tan coonhound", "Walker hound", "English foxhound", "redbone",
"borzoi", "Irish wolfhound", "Italian greyhound", "whippet", "Ibizan hound", "Norwegian elkhound",
"otterhound", "Saluki", "Scottish deerhound", "Weimaraner", "Staffordshire bullterrier",
"American Staffordshire terrier", "Bedlington terrier", "Border terrier", "Kerry blue terrier",
"Irish terrier", "Norfolk terrier", "Norwich terrier", "Yorkshire terrier", "wire-haired fox terrier",
"Lakeland terrier", "Sealyham terrier", "Airedale", "cairn", "Australian terrier", "Dandie Dinmont",
"Boston bull", "miniature schnauzer", "giant schnauzer", "standard schnauzer", "Scotch terrier",
"Tibetan terrier", "silky terrier", "soft-coated wheaten terrier", "West Highland white terrier",
"Lhasa", "flat-coated retriever", "curly-coated retriever", "golden retriever", "Labrador retriever",
"Chesapeake Bay retriever", "German short-haired pointer", "vizsla", "English setter", "Irish setter",
"Gordon setter", "Brittany spaniel", "clumber", "English springer", "Welsh springer spaniel",
"cocker spaniel", "Sussex spaniel", "Irish water spaniel", "kuvasz", "schipperke", "groenendael",
"malinois", "briard", "kelpie", "komondor", "Old English sheepdog", "Shetland sheepdog", "collie",
"Border collie", "Bouvier des Flandres", "Rottweiler", "German shepherd", "Doberman",
"miniature pinscher", "Greater Swiss Mountain dog", "Bernese mountain dog", "Appenzeller", "EntleBucher",
"boxer", "bull mastiff", "Tibetan mastiff", "French bulldog", "Great Dane", "Saint Bernard",
"Eskimo dog", "malamute", "Siberian husky", "dalmatian", "affenpinscher", "basenji", "pug", "Leonberg",
"Newfoundland", "Great Pyrenees", "Samoyed", "Pomeranian", "chow", "keeshond", "Brabancon griffon",
"Pembroke", "Cardigan", "toy poodle", "miniature poodle", "standard poodle", "Mexican hairless",
"timber wolf", "white wolf", "red wolf", "coyote", "dingo", "dhole", "African hunting dog", "hyena",
"red fox", "kit fox", "Arctic fox", "grey fox", "tabby", "tiger cat", "Persian cat", "Siamese cat",
"Egyptian cat", "cougar", "lynx", "leopard", "snow leopard", "jaguar", "lion", "tiger", "cheetah",
"brown bear", "American black bear", "ice bear", "sloth bear", "mongoose", "meerkat", "tiger beetle",
"ladybug", "ground beetle", "long-horned beetle", "leaf beetle", "dung beetle", "rhinoceros beetle",
"weevil", "fly", "bee", "ant", "grasshopper", "cricket", "walking stick", "cockroach", "mantis",
"cicada", "leafhopper", "lacewing", "dragonfly", "damselfly", "admiral", "ringlet", "monarch",
"cabbage butterfly", "sulphur butterfly", "lycaenid", "starfish", "sea urchin", "sea cucumber",
"wood rabbit", "hare", "Angora", "hamster", "porcupine", "fox squirrel", "marmot", "beaver",
"guinea pig", "sorrel", "zebra", "hog", "wild boar", "warthog", "hippopotamus", "ox", "water buffalo",
"bison", "ram", "bighorn", "ibex", "hartebeest", "impala", "gazelle", "Arabian camel", "llama",
"weasel", "mink", "polecat", "black-footed ferret", "otter", "skunk", "badger", "armadillo",
"three-toed sloth", "orangutan", "gorilla", "chimpanzee", "gibbon", "siamang", "guenon", "patas",
"baboon", "macaque", "langur", "colobus", "proboscis monkey", "marmoset", "capuchin", "howler monkey",
"titi", "spider monkey", "squirrel monkey", "Madagascar cat", "indri", "Indian elephant",
"African elephant", "lesser panda", "giant panda", "barracouta", "eel", "coho", "rock beauty",
"anemone fish", "sturgeon", "gar", "lionfish", "puffer", "abacus", "abaya", "academic gown",
"accordion", "acoustic guitar", "aircraft carrier", "airliner", "airship", "altar", "ambulance",
"amphibian", "analog clock", "apiary", "apron", "ashcan", "assault rifle", "backpack", "bakery",
"balance beam", "balloon", "ballpoint", "Band Aid", "banjo", "bannister", "barbell", "barber chair",
"barbershop", "barn", "barometer", "barrel", "barrow", "baseball", "basketball", "bassinet", "bassoon",
"bathing cap", "bath towel", "bathtub", "beach wagon", "beacon", "beaker", "bearskin", "beer bottle",
"beer glass", "bell cote", "bib", "bicycle-built-for-two", "bikini", "binder", "binoculars",
"birdhouse", "boathouse", "bobsled", "bolo tie", "bonnet", "bookcase", "bookshop", "bottlecap", "bow",
"bow tie", "brass", "brassiere", "breakwater", "breastplate", "broom", "bucket", "buckle",
"bulletproof vest", "bullet train", "butcher shop", "cab", "caldron", "candle", "cannon", "canoe",
"can opener", "cardigan", "car mirror", "carousel", "carpenter's kit", "carton", "car wheel",
"cash machine", "cassette", "cassette player", "castle", "catamaran", "CD player", "cello",
"cellular telephone", "chain", "chainlink fence", "chain mail", "chain saw", "chest", "chiffonier",
"chime", "china cabinet", "Christmas stocking", "church", "cinema", "cleaver", "cliff dwelling",
"cloak", "clog", "cocktail shaker", "coffee mug", "coffeepot", "coil", "combination lock",
"computer keyboard", "confectionery", "container ship", "convertible", "corkscrew", "cornet",
"cowboy boot", "cowboy hat", "cradle", "crane (machine)", "crash helmet", "crate", "crib",
"Crock Pot", "croquet ball", "crutch", "cuirass", "dam", "desk", "desktop computer", "dial telephone",
"diaper", "digital clock", "digital watch", "dining table", "dishrag", "dishwasher", "disk brake",
"dock", "dogsled", "dome", "doormat", "drilling platform", "drum", "drumstick", "dumbbell",
"Dutch oven", "electric fan", "electric guitar", "electric locomotive", "entertainment center",
"envelope", "espresso maker", "face powder", "feather boa", "file", "fireboat", "fire engine",
"fire screen", "flagpole", "flute", "folding chair", "football helmet", "forklift", "fountain",
"fountain pen", "four-poster", "freight car", "French horn", "frying pan", "fur coat", "garbage truck",
"gasmask", "gas pump", "goblet", "go-kart", "golf ball", "golfcart", "gondola", "gong", "gown",
"grand piano", "greenhouse", "grille", "grocery store", "guillotine", "hair slide", "hair spray",
"half track", "hammer", "hamper", "hand blower", "hand-held computer", "handkerchief", "hard disc",
"harmonica", "harp", "harvester", "hatchet", "holster", "home theater", "honeycomb", "hook",
"hoopskirt", "horizontal bar", "horse cart", "hourglass", "iPod", "iron", "jack-o'-lantern", "jean",
"jeep", "jersey", "jigsaw puzzle", "jinrikisha", "joystick", "kimono", "knee pad", "knot", "lab coat",
"ladle", "lampshade", "laptop", "lawn mower", "lens cap", "letter opener", "library", "lifeboat",
"lighter", "limousine", "liner", "lipstick", "Loafer", "lotion", "loudspeaker", "loupe", "lumbermill",
"magnetic compass", "mailbag", "mailbox", "maillot (tights)", "maillot (tank suit)", "manhole cover",
"maraca", "marimba", "mask", "matchstick", "maypole", "maze", "measuring cup", "medicine chest",
"megalith", "microphone", "microwave", "military uniform", "milk can", "minibus", "miniskirt",
"minivan", "missile", "mitten", "mixing bowl", "mobile home", "Model T", "modem", "monastery",
"monitor", "moped", "mortar", "mortarboard", "mosque", "mosquito net", "motor scooter", "mountain bike",
"mountain tent", "mouse", "mousetrap", "moving van", "muzzle", "nail", "neck brace", "necklace",
"nipple", "notebook", "obelisk", "oboe", "ocarina", "odometer", "oil filter", "organ", "oscilloscope",
"overskirt", "oxcart", "oxygen mask", "packet", "paddle", "paddlewheel", "padlock", "paintbrush",
"pajama", "palace", "panpipe", "paper towel", "parachute", "parallel bars", "park bench",
"parking meter", "passenger car", "patio", "pay-phone", "pedestal", "pencil box", "pencil sharpener",
"perfume", "Petri dish", "photocopier", "pick", "pickelhaube", "picket fence", "pickup", "pier",
"piggy bank", "pill bottle", "pillow", "ping-pong ball", "pinwheel", "pirate", "pitcher", "plane",
"planetarium", "plastic bag", "plate rack", "plow", "plunger", "Polaroid camera", "pole",
"police van", "poncho", "pool table", "pop bottle", "pot", "potter's wheel", "power drill",
"prayer rug", "printer", "prison", "projectile", "projector", "puck", "punching bag", "purse",
"quill", "quilt", "racer", "racket", "radiator", "radio", "radio telescope", "rain barrel",
"recreational vehicle", "reel", "reflex camera", "refrigerator", "remote control", "restaurant",
"revolver", "rifle", "rocking chair", "rotisserie", "rubber eraser", "rugby ball", "rule",
"running shoe", "safe", "safety pin", "saltshaker", "sandal", "sarong", "sax", "scabbard", "scale",
"school bus", "schooner", "scoreboard", "screen", "screw", "screwdriver", "seat belt", "sewing machine",
"shield", "shoe shop", "shoji", "shopping basket", "shopping cart", "shovel", "shower cap",
"shower curtain", "ski", "ski mask", "sleeping bag", "slide rule", "sliding door", "slot", "snorkel",
"snowmobile", "snowplow", "soap dispenser", "soccer ball", "sock", "solar dish", "sombrero",
"soup bowl", "space bar", "space heater", "space shuttle", "spatula", "speedboat", "spider web",
"spindle", "sports car", "spotlight", "stage", "steam locomotive", "steel arch bridge", "steel drum",
"stethoscope", "stole", "stone wall", "stopwatch", "stove", "strainer", "streetcar", "stretcher",
"studio couch", "stupa", "submarine", "suit", "sundial", "sunglass", "sunglasses", "sunscreen",
"suspension bridge", "swab", "sweatshirt", "swimming trunks", "swing", "switch", "syringe",
"table lamp", "tank", "tape player", "teapot", "teddy", "television", "tennis ball", "thatch",
"theater curtain", "thimble", "thresher", "throne", "tile roof", "toaster", "tobacco shop",
"toilet seat", "torch", "totem pole", "tow truck", "toyshop", "tractor", "trailer truck", "tray",
"trench coat", "tricycle", "trimaran", "tripod", "triumphal arch", "trolleybus", "trombone", "tub",
"turnstile", "typewriter keyboard", "umbrella", "unicycle", "upright", "vacuum", "vase", "vault",
"velvet", "vending machine", "vestment", "viaduct", "violin", "volleyball", "waffle iron", "wall clock",
"wallet", "wardrobe", "warplane", "washbasin", "washer", "water bottle", "water jug", "water tower",
"whiskey jug", "whistle", "wig", "window screen", "window shade", "Windsor tie", "wine bottle", "wing",
"wok", "wooden spoon", "wool", "worm fence", "wreck", "yawl", "yurt", "web site", "comic book",
"crossword puzzle", "street sign", "traffic light", "book jacket", "menu", "plate", "guacamole",
"consomme", "hot pot", "trifle", "ice cream", "ice lolly", "French loaf", "bagel", "pretzel",
"cheeseburger", "hotdog", "mashed potato", "head cabbage", "broccoli", "cauliflower", "zucchini",
"spaghetti squash", "acorn squash", "butternut squash", "cucumber", "artichoke", "bell pepper",
"cardoon", "mushroom", "Granny Smith", "strawberry", "orange", "lemon", "fig", "pineapple", "banana",
"jackfruit", "custard apple", "pomegranate", "hay", "carbonara", "chocolate sauce", "dough",
"meat loaf", "pizza", "potpie", "burrito", "red wine", "espresso", "cup", "eggnog", "alp", "bubble",
"cliff", "coral reef", "geyser", "lakeside", "promontory", "sandbar", "seashore", "valley", "volcano",
"ballplayer", "groom", "scuba diver", "rapeseed", "daisy", "yellow lady's slipper", "corn", "acorn",
"hip", "buckeye", "coral fungus", "agaric", "gyromitra", "stinkhorn", "earthstar", "hen-of-the-woods",
"bolete", "ear", "toilet tissue"
};
int y_offset = 0;
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
// fprintf(stderr, "%d = %.5f\n", obj.label, obj.prob);
char text[256];
sprintf(text, "%4.1f%% %s", obj.prob * 100, class_names[obj.label]);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = 0;
int y = y_offset;
cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(rgb, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
y_offset += label_size.height;
}
return 0;
}
... ...
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// 1. install
// pip3 install -U ultralytics pnnx ncnn
// 2. export yolov8 torchscript
// yolo export model=yolov8n.pt format=torchscript
// 3. convert torchscript with static shape
// pnnx yolov8n.torchscript
// 4. modify yolov8n_pnnx.py for dynamic shape inference
// A. modify reshape to support dynamic image sizes
// B. permute tensor before concat and adjust concat axis
// C. drop post-process part
// before:
// v_165 = v_142.view(1, 144, 6400)
// v_166 = v_153.view(1, 144, 1600)
// v_167 = v_164.view(1, 144, 400)
// v_168 = torch.cat((v_165, v_166, v_167), dim=2)
// ...
// after:
// v_165 = v_142.view(1, 144, -1).transpose(1, 2)
// v_166 = v_153.view(1, 144, -1).transpose(1, 2)
// v_167 = v_164.view(1, 144, -1).transpose(1, 2)
// v_168 = torch.cat((v_165, v_166, v_167), dim=1)
// return v_168
// 5. re-export yolov8 torchscript
// python3 -c 'import yolov8n_pnnx; yolov8n_pnnx.export_torchscript()'
// 6. convert new torchscript with dynamic shape
// pnnx yolov8n_pnnx.py.pt inputshape=[1,3,640,640] inputshape2=[1,3,320,320]
// 7. now you get ncnn model files
// mv yolov8n_pnnx.py.ncnn.param yolov8n.ncnn.param
// mv yolov8n_pnnx.py.ncnn.bin yolov8n.ncnn.bin
// the out blob would be a 2-dim tensor with w=144 h=8400
//
// | bbox-reg 16 x 4 | per-class scores(80) |
// +-----+-----+-----+-----+----------------------+
// | dx0 | dy0 | dx1 | dy1 |0.1 0.0 0.0 0.5 ......|
// all /| | | | | . |
// boxes | .. | .. | .. | .. |0.0 0.9 0.0 0.0 ......|
// (8400)| | | | | . |
// \| | | | | . |
// +-----+-----+-----+-----+----------------------+
//
#include "yolov8.h"
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
while (i <= j)
{
while (objects[i].prob > p)
i++;
while (objects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
// #pragma omp parallel sections
{
// #pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
// #pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
{
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = objects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = objects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = objects[picked[j]];
if (!agnostic && a.label != b.label)
continue;
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static inline float sigmoid(float x)
{
return 1.0f / (1.0f + expf(-x));
}
static void generate_proposals(const ncnn::Mat& pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
{
const int w = in_pad.w;
const int h = in_pad.h;
const int num_grid_x = w / stride;
const int num_grid_y = h / stride;
const int reg_max_1 = 16;
const int num_class = pred.w - reg_max_1 * 4; // number of classes. 80 for COCO
for (int y = 0; y < num_grid_y; y++)
{
for (int x = 0; x < num_grid_x; x++)
{
const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1);
// find label with max score
int label = -1;
float score = -FLT_MAX;
{
const ncnn::Mat pred_score = pred_grid.range(reg_max_1 * 4, num_class);
for (int k = 0; k < num_class; k++)
{
float s = pred_score[k];
if (s > score)
{
label = k;
score = s;
}
}
score = sigmoid(score);
}
if (score >= prob_threshold)
{
ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4);
{
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(pred_bbox, opt);
softmax->destroy_pipeline(opt);
delete softmax;
}
float pred_ltrb[4];
for (int k = 0; k < 4; k++)
{
float dis = 0.f;
const float* dis_after_sm = pred_bbox.row(k);
for (int l = 0; l < reg_max_1; l++)
{
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * stride;
}
float pb_cx = (x + 0.5f) * stride;
float pb_cy = (y + 0.5f) * stride;
float x0 = pb_cx - pred_ltrb[0];
float y0 = pb_cy - pred_ltrb[1];
float x1 = pb_cx + pred_ltrb[2];
float y1 = pb_cy + pred_ltrb[3];
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.prob = score;
objects.push_back(obj);
}
}
}
}
static void generate_proposals(const ncnn::Mat& pred, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
{
const int w = in_pad.w;
const int h = in_pad.h;
int pred_row_offset = 0;
for (size_t i = 0; i < strides.size(); i++)
{
const int stride = strides[i];
const int num_grid_x = w / stride;
const int num_grid_y = h / stride;
const int num_grid = num_grid_x * num_grid_y;
generate_proposals(pred.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects);
pred_row_offset += num_grid;
}
}
int YOLOv8_det::detect(const cv::Mat& rgb, std::vector<Object>& objects)
{
const int target_size = det_target_size;//640;
const float prob_threshold = 0.25f;
const float nms_threshold = 0.45f;
int img_w = rgb.cols;
int img_h = rgb.rows;
// ultralytics/cfg/models/v8/yolov8.yaml
std::vector<int> strides(3);
strides[0] = 8;
strides[1] = 16;
strides[2] = 32;
const int max_stride = 32;
// letterbox pad to multiple of max_stride
int w = img_w;
int h = img_h;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
// letterbox pad to target_size rectangle
int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = yolov8.create_extractor();
ex.input("in0", in_pad);
ncnn::Mat out;
ex.extract("out0", out);
std::vector<Object> proposals;
generate_proposals(out, strides, in_pad, prob_threshold, proposals);
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
// clip
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
// sort objects by area
struct
{
bool operator()(const Object& a, const Object& b) const
{
return a.rect.area() > b.rect.area();
}
} objects_area_greater;
std::sort(objects.begin(), objects.end(), objects_area_greater);
return 0;
}
int YOLOv8_det_coco::draw(cv::Mat& rgb, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
static cv::Scalar colors[] = {
cv::Scalar( 67, 54, 244),
cv::Scalar( 30, 99, 233),
cv::Scalar( 39, 176, 156),
cv::Scalar( 58, 183, 103),
cv::Scalar( 81, 181, 63),
cv::Scalar(150, 243, 33),
cv::Scalar(169, 244, 3),
cv::Scalar(188, 212, 0),
cv::Scalar(150, 136, 0),
cv::Scalar(175, 80, 76),
cv::Scalar(195, 74, 139),
cv::Scalar(220, 57, 205),
cv::Scalar(235, 59, 255),
cv::Scalar(193, 7, 255),
cv::Scalar(152, 0, 255),
cv::Scalar( 87, 34, 255),
cv::Scalar( 85, 72, 121),
cv::Scalar(158, 158, 158),
cv::Scalar(125, 139, 96)
};
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
const cv::Scalar& color = colors[i % 19];
// fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
// obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(rgb, obj.rect, color);
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > rgb.cols)
x = rgb.cols - label_size.width;
cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(rgb, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
return 0;
}
int YOLOv8_det_oiv7::draw(cv::Mat& rgb, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"Accordion", "Adhesive tape", "Aircraft", "Airplane", "Alarm clock", "Alpaca", "Ambulance", "Animal",
"Ant", "Antelope", "Apple", "Armadillo", "Artichoke", "Auto part", "Axe", "Backpack", "Bagel",
"Baked goods", "Balance beam", "Ball", "Balloon", "Banana", "Band-aid", "Banjo", "Barge", "Barrel",
"Baseball bat", "Baseball glove", "Bat (Animal)", "Bathroom accessory", "Bathroom cabinet", "Bathtub",
"Beaker", "Bear", "Bed", "Bee", "Beehive", "Beer", "Beetle", "Bell pepper", "Belt", "Bench", "Bicycle",
"Bicycle helmet", "Bicycle wheel", "Bidet", "Billboard", "Billiard table", "Binoculars", "Bird",
"Blender", "Blue jay", "Boat", "Bomb", "Book", "Bookcase", "Boot", "Bottle", "Bottle opener",
"Bow and arrow", "Bowl", "Bowling equipment", "Box", "Boy", "Brassiere", "Bread", "Briefcase",
"Broccoli", "Bronze sculpture", "Brown bear", "Building", "Bull", "Burrito", "Bus", "Bust", "Butterfly",
"Cabbage", "Cabinetry", "Cake", "Cake stand", "Calculator", "Camel", "Camera", "Can opener", "Canary",
"Candle", "Candy", "Cannon", "Canoe", "Cantaloupe", "Car", "Carnivore", "Carrot", "Cart", "Cassette deck",
"Castle", "Cat", "Cat furniture", "Caterpillar", "Cattle", "Ceiling fan", "Cello", "Centipede",
"Chainsaw", "Chair", "Cheese", "Cheetah", "Chest of drawers", "Chicken", "Chime", "Chisel", "Chopsticks",
"Christmas tree", "Clock", "Closet", "Clothing", "Coat", "Cocktail", "Cocktail shaker", "Coconut",
"Coffee", "Coffee cup", "Coffee table", "Coffeemaker", "Coin", "Common fig", "Common sunflower",
"Computer keyboard", "Computer monitor", "Computer mouse", "Container", "Convenience store", "Cookie",
"Cooking spray", "Corded phone", "Cosmetics", "Couch", "Countertop", "Cowboy hat", "Crab", "Cream",
"Cricket ball", "Crocodile", "Croissant", "Crown", "Crutch", "Cucumber", "Cupboard", "Curtain",
"Cutting board", "Dagger", "Dairy Product", "Deer", "Desk", "Dessert", "Diaper", "Dice", "Digital clock",
"Dinosaur", "Dishwasher", "Dog", "Dog bed", "Doll", "Dolphin", "Door", "Door handle", "Doughnut",
"Dragonfly", "Drawer", "Dress", "Drill (Tool)", "Drink", "Drinking straw", "Drum", "Duck", "Dumbbell",
"Eagle", "Earrings", "Egg (Food)", "Elephant", "Envelope", "Eraser", "Face powder", "Facial tissue holder",
"Falcon", "Fashion accessory", "Fast food", "Fax", "Fedora", "Filing cabinet", "Fire hydrant",
"Fireplace", "Fish", "Flag", "Flashlight", "Flower", "Flowerpot", "Flute", "Flying disc", "Food",
"Food processor", "Football", "Football helmet", "Footwear", "Fork", "Fountain", "Fox", "French fries",
"French horn", "Frog", "Fruit", "Frying pan", "Furniture", "Garden Asparagus", "Gas stove", "Giraffe",
"Girl", "Glasses", "Glove", "Goat", "Goggles", "Goldfish", "Golf ball", "Golf cart", "Gondola",
"Goose", "Grape", "Grapefruit", "Grinder", "Guacamole", "Guitar", "Hair dryer", "Hair spray", "Hamburger",
"Hammer", "Hamster", "Hand dryer", "Handbag", "Handgun", "Harbor seal", "Harmonica", "Harp",
"Harpsichord", "Hat", "Headphones", "Heater", "Hedgehog", "Helicopter", "Helmet", "High heels",
"Hiking equipment", "Hippopotamus", "Home appliance", "Honeycomb", "Horizontal bar", "Horse", "Hot dog",
"House", "Houseplant", "Human arm", "Human beard", "Human body", "Human ear", "Human eye", "Human face",
"Human foot", "Human hair", "Human hand", "Human head", "Human leg", "Human mouth", "Human nose",
"Humidifier", "Ice cream", "Indoor rower", "Infant bed", "Insect", "Invertebrate", "Ipod", "Isopod",
"Jacket", "Jacuzzi", "Jaguar (Animal)", "Jeans", "Jellyfish", "Jet ski", "Jug", "Juice", "Kangaroo",
"Kettle", "Kitchen & dining room table", "Kitchen appliance", "Kitchen knife", "Kitchen utensil",
"Kitchenware", "Kite", "Knife", "Koala", "Ladder", "Ladle", "Ladybug", "Lamp", "Land vehicle",
"Lantern", "Laptop", "Lavender (Plant)", "Lemon", "Leopard", "Light bulb", "Light switch", "Lighthouse",
"Lily", "Limousine", "Lion", "Lipstick", "Lizard", "Lobster", "Loveseat", "Luggage and bags", "Lynx",
"Magpie", "Mammal", "Man", "Mango", "Maple", "Maracas", "Marine invertebrates", "Marine mammal",
"Measuring cup", "Mechanical fan", "Medical equipment", "Microphone", "Microwave oven", "Milk",
"Miniskirt", "Mirror", "Missile", "Mixer", "Mixing bowl", "Mobile phone", "Monkey", "Moths and butterflies",
"Motorcycle", "Mouse", "Muffin", "Mug", "Mule", "Mushroom", "Musical instrument", "Musical keyboard",
"Nail (Construction)", "Necklace", "Nightstand", "Oboe", "Office building", "Office supplies", "Orange",
"Organ (Musical Instrument)", "Ostrich", "Otter", "Oven", "Owl", "Oyster", "Paddle", "Palm tree",
"Pancake", "Panda", "Paper cutter", "Paper towel", "Parachute", "Parking meter", "Parrot", "Pasta",
"Pastry", "Peach", "Pear", "Pen", "Pencil case", "Pencil sharpener", "Penguin", "Perfume", "Person",
"Personal care", "Personal flotation device", "Piano", "Picnic basket", "Picture frame", "Pig",
"Pillow", "Pineapple", "Pitcher (Container)", "Pizza", "Pizza cutter", "Plant", "Plastic bag", "Plate",
"Platter", "Plumbing fixture", "Polar bear", "Pomegranate", "Popcorn", "Porch", "Porcupine", "Poster",
"Potato", "Power plugs and sockets", "Pressure cooker", "Pretzel", "Printer", "Pumpkin", "Punching bag",
"Rabbit", "Raccoon", "Racket", "Radish", "Ratchet (Device)", "Raven", "Rays and skates", "Red panda",
"Refrigerator", "Remote control", "Reptile", "Rhinoceros", "Rifle", "Ring binder", "Rocket",
"Roller skates", "Rose", "Rugby ball", "Ruler", "Salad", "Salt and pepper shakers", "Sandal",
"Sandwich", "Saucer", "Saxophone", "Scale", "Scarf", "Scissors", "Scoreboard", "Scorpion",
"Screwdriver", "Sculpture", "Sea lion", "Sea turtle", "Seafood", "Seahorse", "Seat belt", "Segway",
"Serving tray", "Sewing machine", "Shark", "Sheep", "Shelf", "Shellfish", "Shirt", "Shorts",
"Shotgun", "Shower", "Shrimp", "Sink", "Skateboard", "Ski", "Skirt", "Skull", "Skunk", "Skyscraper",
"Slow cooker", "Snack", "Snail", "Snake", "Snowboard", "Snowman", "Snowmobile", "Snowplow",
"Soap dispenser", "Sock", "Sofa bed", "Sombrero", "Sparrow", "Spatula", "Spice rack", "Spider",
"Spoon", "Sports equipment", "Sports uniform", "Squash (Plant)", "Squid", "Squirrel", "Stairs",
"Stapler", "Starfish", "Stationary bicycle", "Stethoscope", "Stool", "Stop sign", "Strawberry",
"Street light", "Stretcher", "Studio couch", "Submarine", "Submarine sandwich", "Suit", "Suitcase",
"Sun hat", "Sunglasses", "Surfboard", "Sushi", "Swan", "Swim cap", "Swimming pool", "Swimwear",
"Sword", "Syringe", "Table", "Table tennis racket", "Tablet computer", "Tableware", "Taco", "Tank",
"Tap", "Tart", "Taxi", "Tea", "Teapot", "Teddy bear", "Telephone", "Television", "Tennis ball",
"Tennis racket", "Tent", "Tiara", "Tick", "Tie", "Tiger", "Tin can", "Tire", "Toaster", "Toilet",
"Toilet paper", "Tomato", "Tool", "Toothbrush", "Torch", "Tortoise", "Towel", "Tower", "Toy",
"Traffic light", "Traffic sign", "Train", "Training bench", "Treadmill", "Tree", "Tree house",
"Tripod", "Trombone", "Trousers", "Truck", "Trumpet", "Turkey", "Turtle", "Umbrella", "Unicycle",
"Van", "Vase", "Vegetable", "Vehicle", "Vehicle registration plate", "Violin", "Volleyball (Ball)",
"Waffle", "Waffle iron", "Wall clock", "Wardrobe", "Washing machine", "Waste container", "Watch",
"Watercraft", "Watermelon", "Weapon", "Whale", "Wheel", "Wheelchair", "Whisk", "Whiteboard", "Willow",
"Window", "Window blind", "Wine", "Wine glass", "Wine rack", "Winter melon", "Wok", "Woman",
"Wood-burning stove", "Woodpecker", "Worm", "Wrench", "Zebra", "Zucchini"
};
static cv::Scalar colors[] = {
cv::Scalar( 67, 54, 244),
cv::Scalar( 30, 99, 233),
cv::Scalar( 39, 176, 156),
cv::Scalar( 58, 183, 103),
cv::Scalar( 81, 181, 63),
cv::Scalar(150, 243, 33),
cv::Scalar(169, 244, 3),
cv::Scalar(188, 212, 0),
cv::Scalar(150, 136, 0),
cv::Scalar(175, 80, 76),
cv::Scalar(195, 74, 139),
cv::Scalar(220, 57, 205),
cv::Scalar(235, 59, 255),
cv::Scalar(193, 7, 255),
cv::Scalar(152, 0, 255),
cv::Scalar( 87, 34, 255),
cv::Scalar( 85, 72, 121),
cv::Scalar(158, 158, 158),
cv::Scalar(125, 139, 96)
};
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
const cv::Scalar& color = colors[i % 19];
// fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
// obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(rgb, obj.rect, color);
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > rgb.cols)
x = rgb.cols - label_size.width;
cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(rgb, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
return 0;
}
... ...
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// 1. install
// pip3 install -U ultralytics pnnx ncnn
// 2. export yolov8-obb torchscript
// yolo export model=yolov8n-obb.pt format=torchscript
// 3. convert torchscript with static shape
// pnnx yolov8n-obb.torchscript
// 4. modify yolov8n_obb_pnnx.py for dynamic shape inference
// A. modify reshape to support dynamic image sizes
// B. permute tensor before concat and adjust concat axis
// C. drop post-process part
// before:
// v_137 = v_136.view(1, 1, 16384)
// v_143 = v_142.view(1, 1, 4096)
// v_149 = v_148.view(1, 1, 1024)
// v_150 = torch.cat((v_137, v_143, v_149), dim=2)
// ...
// v_186 = v_163.view(1, 79, 16384)
// v_187 = v_174.view(1, 79, 4096)
// v_188 = v_185.view(1, 79, 1024)
// v_189 = torch.cat((v_186, v_187, v_188), dim=2)
// ...
// after:
// v_137 = v_136.view(1, 1, -1).transpose(1, 2)
// v_143 = v_142.view(1, 1, -1).transpose(1, 2)
// v_149 = v_148.view(1, 1, -1).transpose(1, 2)
// v_150 = torch.cat((v_137, v_143, v_149), dim=1)
// ...
// v_186 = v_163.view(1, 79, -1).transpose(1, 2)
// v_187 = v_174.view(1, 79, -1).transpose(1, 2)
// v_188 = v_185.view(1, 79, -1).transpose(1, 2)
// v_189 = torch.cat((v_186, v_187, v_188), dim=1)
// return v_189, v_150
// 5. re-export yolov8-obb torchscript
// python3 -c 'import yolov8n_obb_pnnx; yolov8n_obb_pnnx.export_torchscript()'
// 6. convert new torchscript with dynamic shape
// pnnx yolov8n_obb_pnnx.py.pt inputshape=[1,3,1024,1024] inputshape2=[1,3,512,512]
// 7. now you get ncnn model files
// mv yolov8n_obb_pnnx.py.ncnn.param yolov8n_obb.ncnn.param
// mv yolov8n_obb_pnnx.py.ncnn.bin yolov8n_obb.ncnn.bin
// the out blob would be a 2-dim tensor with w=79 h=21504
//
// | bbox-reg 16 x 4 |score(15)|
// +-----+-----+-----+-----+---------+
// | dx0 | dy0 | dx1 | dy1 | 0.1 ... |
// all /| | | | | ... |
// boxes | .. | .. | .. | .. | 0.0 ... |
// (21504)| | | | | . ... |
// \| | | | | . ... |
// +-----+-----+-----+-----+---------+
//
// the out blob would be a 2-dim tensor with w=1 h=21504
//
// | degree(1)|
// +----------+
// | 0.1 |
// all /| |
// boxes | 0.0 |
// (21504)| . |
// \| . |
// +----------+
//
#include "yolov8.h"
#include "layer.h"
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <float.h>
#include <stdio.h>
#include <vector>
static inline float intersection_area(const Object& a, const Object& b)
{
std::vector<cv::Point2f> intersection;
cv::rotatedRectangleIntersection(a.rrect, b.rrect, intersection);
if (intersection.empty())
return 0.f;
return cv::contourArea(intersection);
}
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
while (i <= j)
{
while (objects[i].prob > p)
i++;
while (objects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
// #pragma omp parallel sections
{
// #pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
// #pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
{
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = objects[i].rrect.size.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = objects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = objects[picked[j]];
if (!agnostic && a.label != b.label)
continue;
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area;
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static inline float sigmoid(float x)
{
return 1.0f / (1.0f + expf(-x));
}
static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_angle, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
{
const int w = in_pad.w;
const int h = in_pad.h;
const int num_grid_x = w / stride;
const int num_grid_y = h / stride;
const int reg_max_1 = 16;
const int num_class = pred.w - reg_max_1 * 4; // number of classes. 15 for DOTAv1
for (int y = 0; y < num_grid_y; y++)
{
for (int x = 0; x < num_grid_x; x++)
{
const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1);
// find label with max score
int label = -1;
float score = -FLT_MAX;
{
const ncnn::Mat pred_score = pred_grid.range(reg_max_1 * 4, num_class);
for (int k = 0; k < num_class; k++)
{
float s = pred_score[k];
if (s > score)
{
label = k;
score = s;
}
}
score = sigmoid(score);
}
if (score >= prob_threshold)
{
ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4).clone();
{
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(pred_bbox, opt);
softmax->destroy_pipeline(opt);
delete softmax;
}
float pred_ltrb[4];
for (int k = 0; k < 4; k++)
{
float dis = 0.f;
const float* dis_after_sm = pred_bbox.row(k);
for (int l = 0; l < reg_max_1; l++)
{
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * stride;
}
float pb_cx = (x + 0.5f) * stride;
float pb_cy = (y + 0.5f) * stride;
const float angle = sigmoid(pred_angle.row(y * num_grid_x + x)[0]) - 0.25f;
const float angle_rad = angle * 3.14159265358979323846f;
const float angle_degree = angle * 180.f;
float cos = cosf(angle_rad);
float sin = sinf(angle_rad);
float xx = (pred_ltrb[2] - pred_ltrb[0]) * 0.5f;
float yy = (pred_ltrb[3] - pred_ltrb[1]) * 0.5f;
float xr = xx * cos - yy * sin;
float yr = xx * sin + yy * cos;
const float cx = pb_cx + xr;
const float cy = pb_cy + yr;
const float ww = pred_ltrb[2] + pred_ltrb[0];
const float hh = pred_ltrb[3] + pred_ltrb[1];
Object obj;
obj.rrect = cv::RotatedRect(cv::Point2f(cx, cy), cv::Size_<float>(ww, hh), angle_degree);
obj.label = label;
obj.prob = score;
objects.push_back(obj);
}
}
}
}
static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_angle, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
{
const int w = in_pad.w;
const int h = in_pad.h;
int pred_row_offset = 0;
for (size_t i = 0; i < strides.size(); i++)
{
const int stride = strides[i];
const int num_grid_x = w / stride;
const int num_grid_y = h / stride;
const int num_grid = num_grid_x * num_grid_y;
generate_proposals(pred.row_range(pred_row_offset, num_grid), pred_angle.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects);
pred_row_offset += num_grid;
}
}
int YOLOv8_obb::detect(const cv::Mat& rgb, std::vector<Object>& objects)
{
const int target_size = det_target_size;//1024;
const float prob_threshold = 0.25f;
const float nms_threshold = 0.45f;
int img_w = rgb.cols;
int img_h = rgb.rows;
// ultralytics/cfg/models/v8/yolov8.yaml
std::vector<int> strides(3);
strides[0] = 8;
strides[1] = 16;
strides[2] = 32;
const int max_stride = 32;
// letterbox pad to multiple of max_stride
int w = img_w;
int h = img_h;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
// letterbox pad to target_size rectangle
int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = yolov8.create_extractor();
ex.input("in0", in_pad);
ncnn::Mat out;
ex.extract("out0", out);
ncnn::Mat out_angle;
ex.extract("out1", out_angle);
std::vector<Object> proposals;
generate_proposals(out, out_angle, strides, in_pad, prob_threshold, proposals);
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
if (count == 0)
return 0;
objects.resize(count);
for (int i = 0; i < count; i++)
{
Object obj = proposals[picked[i]];
// adjust offset to original unpadded
obj.rrect.center.x = (obj.rrect.center.x - (wpad / 2)) / scale;
obj.rrect.center.y = (obj.rrect.center.y - (hpad / 2)) / scale;
obj.rrect.size.width = (obj.rrect.size.width) / scale;
obj.rrect.size.height = (obj.rrect.size.height) / scale;
objects[i] = obj;
}
return 0;
}
int YOLOv8_obb::draw(cv::Mat& rgb, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"plane", "ship", "storage tank", "baseball diamond", "tennis court",
"basketball court", "ground track field", "harbor", "bridge", "large vehicle",
"small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool"
};
static const cv::Scalar colors[] = {
cv::Scalar( 39, 176, 156),
cv::Scalar( 58, 183, 103),
cv::Scalar( 81, 181, 63),
cv::Scalar(150, 243, 33),
cv::Scalar(169, 244, 3),
cv::Scalar(188, 212, 0),
cv::Scalar(150, 136, 0),
cv::Scalar(175, 80, 76),
cv::Scalar(195, 74, 139),
cv::Scalar(220, 57, 205),
cv::Scalar(235, 59, 255),
cv::Scalar(193, 7, 255),
cv::Scalar(152, 0, 255),
cv::Scalar( 87, 34, 255),
cv::Scalar( 85, 72, 121),
cv::Scalar(158, 158, 158),
cv::Scalar(125, 139, 96)
};
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
const cv::Scalar& color = colors[obj.label];
// fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f @ %.2f\n", obj.label, obj.prob,
// obj.rrect.center.x, obj.rrect.center.y, obj.rrect.size.width, obj.rrect.size.height, obj.rrect.angle);
cv::Point2f corners[4];
obj.rrect.points(corners);
cv::line(rgb, corners[0], corners[1], color);
cv::line(rgb, corners[1], corners[2], color);
cv::line(rgb, corners[2], corners[3], color);
cv::line(rgb, corners[3], corners[0], color);
}
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
const cv::Scalar& color = colors[obj.label];
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rrect.center.x - label_size.width / 2;
int y = obj.rrect.center.y - label_size.height / 2 - baseLine;
if (y < 0)
y = 0;
if (y + label_size.height > rgb.rows)
y = rgb.rows - label_size.height;
if (x < 0)
x = 0;
if (x + label_size.width > rgb.cols)
x = rgb.cols - label_size.width;
cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(rgb, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
return 0;
}
... ...
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// 1. install
// pip3 install -U ultralytics pnnx ncnn
// 2. export yolov8-pose torchscript
// yolo export model=yolov8n-pose.pt format=torchscript
// 3. convert torchscript with static shape
// pnnx yolov8n-pose.torchscript
// 4. modify yolov8n_pose_pnnx.py for dynamic shape inference
// A. modify reshape to support dynamic image sizes
// B. permute tensor before concat and adjust concat axis
// C. drop post-process part
// before:
// v_137 = v_136.view(1, 51, 6400)
// v_143 = v_142.view(1, 51, 1600)
// v_149 = v_148.view(1, 51, 400)
// v_150 = torch.cat((v_137, v_143, v_149), dim=-1)
// ...
// v_184 = v_161.view(1, 65, 6400)
// v_185 = v_172.view(1, 65, 1600)
// v_186 = v_183.view(1, 65, 400)
// v_187 = torch.cat((v_184, v_185, v_186), dim=2)
// ...
// after:
// v_137 = v_136.view(1, 51, -1).transpose(1, 2)
// v_143 = v_142.view(1, 51, -1).transpose(1, 2)
// v_149 = v_148.view(1, 51, -1).transpose(1, 2)
// v_150 = torch.cat((v_137, v_143, v_149), dim=1)
// ...
// v_184 = v_161.view(1, 65, -1).transpose(1, 2)
// v_185 = v_172.view(1, 65, -1).transpose(1, 2)
// v_186 = v_183.view(1, 65, -1).transpose(1, 2)
// v_187 = torch.cat((v_184, v_185, v_186), dim=1)
// return v_187, v_150
// 5. re-export yolov8-pose torchscript
// python3 -c 'import yolov8n_pose_pnnx; yolov8n_pose_pnnx.export_torchscript()'
// 6. convert new torchscript with dynamic shape
// pnnx yolov8n_pose_pnnx.py.pt inputshape=[1,3,640,640] inputshape2=[1,3,320,320]
// 7. now you get ncnn model files
// mv yolov8n_pose_pnnx.py.ncnn.param yolov8n_pose.ncnn.param
// mv yolov8n_pose_pnnx.py.ncnn.bin yolov8n_pose.ncnn.bin
// the out blob would be a 2-dim tensor with w=65 h=8400
//
// | bbox-reg 16 x 4 |score(1)|
// +-----+-----+-----+-----+--------+
// | dx0 | dy0 | dx1 | dy1 | 0.1 |
// all /| | | | | |
// boxes | .. | .. | .. | .. | 0.0 |
// (8400)| | | | | . |
// \| | | | | . |
// +-----+-----+-----+-----+--------+
//
//
// | pose (51) |
// +-----------+
// |0.1........|
// all /| |
// boxes |0.0........|
// (8400)| . |
// \| . |
// +-----------+
//
#include "yolov8.h"
#include "layer.h"
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <float.h>
#include <stdio.h>
#include <vector>
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
while (i <= j)
{
while (objects[i].prob > p)
i++;
while (objects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
// #pragma omp parallel sections
{
// #pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
// #pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
{
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = objects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = objects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = objects[picked[j]];
if (!agnostic && a.label != b.label)
continue;
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static inline float sigmoid(float x)
{
return 1.0f / (1.0f + expf(-x));
}
static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_points, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
{
const int w = in_pad.w;
const int h = in_pad.h;
const int num_grid_x = w / stride;
const int num_grid_y = h / stride;
const int reg_max_1 = 16;
const int num_points = pred_points.w / 3;
for (int y = 0; y < num_grid_y; y++)
{
for (int x = 0; x < num_grid_x; x++)
{
const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1);
const ncnn::Mat pred_points_grid = pred_points.row_range(y * num_grid_x + x, 1).reshape(3, num_points);
// find label with max score
int label = 0;
float score = sigmoid(pred_grid[reg_max_1 * 4]);
if (score >= prob_threshold)
{
ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4).clone();
{
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(pred_bbox, opt);
softmax->destroy_pipeline(opt);
delete softmax;
}
float pred_ltrb[4];
for (int k = 0; k < 4; k++)
{
float dis = 0.f;
const float* dis_after_sm = pred_bbox.row(k);
for (int l = 0; l < reg_max_1; l++)
{
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * stride;
}
float pb_cx = (x + 0.5f) * stride;
float pb_cy = (y + 0.5f) * stride;
float x0 = pb_cx - pred_ltrb[0];
float y0 = pb_cy - pred_ltrb[1];
float x1 = pb_cx + pred_ltrb[2];
float y1 = pb_cy + pred_ltrb[3];
std::vector<KeyPoint> keypoints;
for (int k = 0; k < num_points; k++)
{
KeyPoint keypoint;
keypoint.p.x = (x + pred_points_grid.row(k)[0] * 2) * stride;
keypoint.p.y = (y + pred_points_grid.row(k)[1] * 2) * stride;
keypoint.prob = sigmoid(pred_points_grid.row(k)[2]);
keypoints.push_back(keypoint);
}
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.prob = score;
obj.keypoints = keypoints;
objects.push_back(obj);
}
}
}
}
static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_points, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
{
const int w = in_pad.w;
const int h = in_pad.h;
int pred_row_offset = 0;
for (size_t i = 0; i < strides.size(); i++)
{
const int stride = strides[i];
const int num_grid_x = w / stride;
const int num_grid_y = h / stride;
const int num_grid = num_grid_x * num_grid_y;
generate_proposals(pred.row_range(pred_row_offset, num_grid), pred_points.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects);
pred_row_offset += num_grid;
}
}
int YOLOv8_pose::detect(const cv::Mat& rgb, std::vector<Object>& objects)
{
const int target_size = det_target_size;//640;
const float prob_threshold = 0.25f;
const float nms_threshold = 0.45f;
const float mask_threshold = 0.5f;
int img_w = rgb.cols;
int img_h = rgb.rows;
// ultralytics/cfg/models/v8/yolov8.yaml
std::vector<int> strides(3);
strides[0] = 8;
strides[1] = 16;
strides[2] = 32;
const int max_stride = 32;
// letterbox pad to multiple of max_stride
int w = img_w;
int h = img_h;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
// letterbox pad to target_size rectangle
int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = yolov8.create_extractor();
ex.input("in0", in_pad);
ncnn::Mat out;
ex.extract("out0", out);
ncnn::Mat out_points;
ex.extract("out1", out_points);
std::vector<Object> proposals;
generate_proposals(out, out_points, strides, in_pad, prob_threshold, proposals);
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
if (count == 0)
return 0;
const int num_points = out_points.w / 3;
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
for (int j = 0; j < num_points; j++)
{
objects[i].keypoints[j].p.x = (objects[i].keypoints[j].p.x - (wpad / 2)) / scale;
objects[i].keypoints[j].p.y = (objects[i].keypoints[j].p.y - (hpad / 2)) / scale;
}
// clip
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
// sort objects by area
struct
{
bool operator()(const Object& a, const Object& b) const
{
return a.rect.area() > b.rect.area();
}
} objects_area_greater;
std::sort(objects.begin(), objects.end(), objects_area_greater);
return 0;
}
int YOLOv8_pose::draw(cv::Mat& rgb, const std::vector<Object>& objects)
{
static const char* class_names[] = {"person"};
static const cv::Scalar colors[] = {
cv::Scalar( 67, 54, 244),
cv::Scalar( 30, 99, 233),
cv::Scalar( 39, 176, 156),
cv::Scalar( 58, 183, 103),
cv::Scalar( 81, 181, 63),
cv::Scalar(150, 243, 33),
cv::Scalar(169, 244, 3),
cv::Scalar(188, 212, 0),
cv::Scalar(150, 136, 0),
cv::Scalar(175, 80, 76),
cv::Scalar(195, 74, 139),
cv::Scalar(220, 57, 205),
cv::Scalar(235, 59, 255),
cv::Scalar(193, 7, 255),
cv::Scalar(152, 0, 255),
cv::Scalar( 87, 34, 255),
cv::Scalar( 85, 72, 121),
cv::Scalar(158, 158, 158),
cv::Scalar(125, 139, 96)
};
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
const cv::Scalar& color = colors[i % 19];
// fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
// obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
// draw bone
static const int joint_pairs[16][2] = {
{0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16}
};
static const cv::Scalar bone_colors[] = {
cv::Scalar( 0, 0, 255),
cv::Scalar( 0, 0, 255),
cv::Scalar( 0, 0, 255),
cv::Scalar( 0, 0, 255),
cv::Scalar( 0, 255, 128),
cv::Scalar( 0, 255, 128),
cv::Scalar( 0, 255, 128),
cv::Scalar( 0, 255, 128),
cv::Scalar( 0, 255, 128),
cv::Scalar(255, 255, 51),
cv::Scalar(255, 255, 51),
cv::Scalar(255, 255, 51),
cv::Scalar(255, 51, 153),
cv::Scalar(255, 51, 153),
cv::Scalar(255, 51, 153),
cv::Scalar(255, 51, 153),
};
for (int j = 0; j < 16; j++)
{
const KeyPoint& p1 = obj.keypoints[joint_pairs[j][0]];
const KeyPoint& p2 = obj.keypoints[joint_pairs[j][1]];
if (p1.prob < 0.2f || p2.prob < 0.2f)
continue;
cv::line(rgb, p1.p, p2.p, bone_colors[j], 2);
}
// draw joint
for (size_t j = 0; j < obj.keypoints.size(); j++)
{
const KeyPoint& keypoint = obj.keypoints[j];
// fprintf(stderr, "%.2f %.2f = %.5f\n", keypoint.p.x, keypoint.p.y, keypoint.prob);
if (keypoint.prob < 0.2f)
continue;
cv::circle(rgb, keypoint.p, 3, color, -1);
}
cv::rectangle(rgb, obj.rect, color);
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > rgb.cols)
x = rgb.cols - label_size.width;
cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(rgb, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
return 0;
}
... ...
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// 1. install
// pip3 install -U ultralytics pnnx ncnn
// 2. export yolov8-seg torchscript
// yolo export model=yolov8n-seg.pt format=torchscript
// 3. convert torchscript with static shape
// pnnx yolov8n-seg.torchscript
// 4. modify yolov8n_seg_pnnx.py for dynamic shape inference
// A. modify reshape to support dynamic image sizes
// B. permute tensor before concat and adjust concat axis
// C. drop post-process part
// before:
// v_144 = v_143.view(1, 32, 6400)
// v_150 = v_149.view(1, 32, 1600)
// v_156 = v_155.view(1, 32, 400)
// v_157 = torch.cat((v_144, v_150, v_156), dim=2)
// ...
// v_191 = v_168.view(1, 144, 6400)
// v_192 = v_179.view(1, 144, 1600)
// v_193 = v_190.view(1, 144, 400)
// v_194 = torch.cat((v_191, v_192, v_193), dim=2)
// ...
// v_215 = (v_214, v_138, )
// return v_215
// after:
// v_144 = v_143.view(1, 32, -1).transpose(1, 2)
// v_150 = v_149.view(1, 32, -1).transpose(1, 2)
// v_156 = v_155.view(1, 32, -1).transpose(1, 2)
// v_157 = torch.cat((v_144, v_150, v_156), dim=1)
// ...
// v_191 = v_168.view(1, 144, -1).transpose(1, 2)
// v_192 = v_179.view(1, 144, -1).transpose(1, 2)
// v_193 = v_190.view(1, 144, -1).transpose(1, 2)
// v_194 = torch.cat((v_191, v_192, v_193), dim=1)
// return v_194, v_157, v_138
// 5. re-export yolov8-seg torchscript
// python3 -c 'import yolov8n_seg_pnnx; yolov8n_seg_pnnx.export_torchscript()'
// 6. convert new torchscript with dynamic shape
// pnnx yolov8n_seg_pnnx.py.pt inputshape=[1,3,640,640] inputshape2=[1,3,320,320]
// 7. now you get ncnn model files
// mv yolov8n_seg_pnnx.py.ncnn.param yolov8n_seg.ncnn.param
// mv yolov8n_seg_pnnx.py.ncnn.bin yolov8n_seg.ncnn.bin
// the out blob would be a 2-dim tensor with w=176 h=8400
//
// | bbox-reg 16 x 4 | per-class scores(80) |
// +-----+-----+-----+-----+----------------------+
// | dx0 | dy0 | dx1 | dy1 |0.1 0.0 0.0 0.5 ......|
// all /| | | | | . |
// boxes | .. | .. | .. | .. |0.0 0.9 0.0 0.0 ......|
// (8400)| | | | | . |
// \| | | | | . |
// +-----+-----+-----+-----+----------------------+
//
//
// | mask (32) |
// +-----------+
// |0.1........|
// all /| |
// boxes |0.0........|
// (8400)| . |
// \| . |
// +-----------+
//
#include "yolov8.h"
#include "layer.h"
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <float.h>
#include <stdio.h>
#include <vector>
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
while (i <= j)
{
while (objects[i].prob > p)
i++;
while (objects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
// #pragma omp parallel sections
{
// #pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
// #pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
{
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = objects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = objects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = objects[picked[j]];
if (!agnostic && a.label != b.label)
continue;
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static inline float sigmoid(float x)
{
return 1.0f / (1.0f + expf(-x));
}
static void generate_proposals(const ncnn::Mat& pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
{
const int w = in_pad.w;
const int h = in_pad.h;
const int num_grid_x = w / stride;
const int num_grid_y = h / stride;
const int reg_max_1 = 16;
const int num_class = pred.w - reg_max_1 * 4; // number of classes. 80 for COCO
for (int y = 0; y < num_grid_y; y++)
{
for (int x = 0; x < num_grid_x; x++)
{
const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1);
// find label with max score
int label = -1;
float score = -FLT_MAX;
{
const ncnn::Mat pred_score = pred_grid.range(reg_max_1 * 4, num_class);
for (int k = 0; k < num_class; k++)
{
float s = pred_score[k];
if (s > score)
{
label = k;
score = s;
}
}
score = sigmoid(score);
}
if (score >= prob_threshold)
{
ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4).clone();
{
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(pred_bbox, opt);
softmax->destroy_pipeline(opt);
delete softmax;
}
float pred_ltrb[4];
for (int k = 0; k < 4; k++)
{
float dis = 0.f;
const float* dis_after_sm = pred_bbox.row(k);
for (int l = 0; l < reg_max_1; l++)
{
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * stride;
}
float pb_cx = (x + 0.5f) * stride;
float pb_cy = (y + 0.5f) * stride;
float x0 = pb_cx - pred_ltrb[0];
float y0 = pb_cy - pred_ltrb[1];
float x1 = pb_cx + pred_ltrb[2];
float y1 = pb_cy + pred_ltrb[3];
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.prob = score;
obj.gindex = y * num_grid_x + x;
objects.push_back(obj);
}
}
}
}
static void generate_proposals(const ncnn::Mat& pred, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
{
const int w = in_pad.w;
const int h = in_pad.h;
int pred_row_offset = 0;
for (size_t i = 0; i < strides.size(); i++)
{
const int stride = strides[i];
const int num_grid_x = w / stride;
const int num_grid_y = h / stride;
const int num_grid = num_grid_x * num_grid_y;
std::vector<Object> objects_stride;
generate_proposals(pred.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects_stride);
for (size_t j = 0; j < objects_stride.size(); j++)
{
Object obj = objects_stride[j];
obj.gindex += pred_row_offset;
objects.push_back(obj);
}
pred_row_offset += num_grid;
}
}
int YOLOv8_seg::detect(const cv::Mat& rgb, std::vector<Object>& objects)
{
const int target_size = det_target_size;//640;
const float prob_threshold = 0.25f;
const float nms_threshold = 0.45f;
const float mask_threshold = 0.5f;
int img_w = rgb.cols;
int img_h = rgb.rows;
// ultralytics/cfg/models/v8/yolov8.yaml
std::vector<int> strides(3);
strides[0] = 8;
strides[1] = 16;
strides[2] = 32;
const int max_stride = 32;
// letterbox pad to multiple of max_stride
int w = img_w;
int h = img_h;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
// letterbox pad to target_size rectangle
int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = yolov8.create_extractor();
ex.input("in0", in_pad);
ncnn::Mat out;
ex.extract("out0", out);
std::vector<Object> proposals;
generate_proposals(out, strides, in_pad, prob_threshold, proposals);
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
if (count == 0)
return 0;
ncnn::Mat mask_feat;
ex.extract("out1", mask_feat);
ncnn::Mat mask_protos;
ex.extract("out2", mask_protos);
ncnn::Mat objects_mask_feat(mask_feat.w, 1, count);
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
// clip
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
// pick mask feat
memcpy(objects_mask_feat.channel(i), mask_feat.row(objects[i].gindex), mask_feat.w * sizeof(float));
}
// process mask
ncnn::Mat objects_mask;
{
ncnn::Layer* gemm = ncnn::create_layer("Gemm");
ncnn::ParamDict pd;
pd.set(6, 1); // constantC
pd.set(7, count); // constantM
pd.set(8, mask_protos.w * mask_protos.h); // constantN
pd.set(9, mask_feat.w); // constantK
pd.set(10, -1); // constant_broadcast_type_C
pd.set(11, 1); // output_N1M
gemm->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
gemm->create_pipeline(opt);
std::vector<ncnn::Mat> gemm_inputs(2);
gemm_inputs[0] = objects_mask_feat;
gemm_inputs[1] = mask_protos.reshape(mask_protos.w * mask_protos.h, 1, mask_protos.c);
std::vector<ncnn::Mat> gemm_outputs(1);
gemm->forward(gemm_inputs, gemm_outputs, opt);
objects_mask = gemm_outputs[0].reshape(mask_protos.w, mask_protos.h, count);
gemm->destroy_pipeline(opt);
delete gemm;
}
{
ncnn::Layer* sigmoid = ncnn::create_layer("Sigmoid");
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
sigmoid->create_pipeline(opt);
sigmoid->forward_inplace(objects_mask, opt);
sigmoid->destroy_pipeline(opt);
delete sigmoid;
}
// resize mask map
{
ncnn::Mat objects_mask_resized;
ncnn::resize_bilinear(objects_mask, objects_mask_resized, in_pad.w / scale, in_pad.h / scale);
objects_mask = objects_mask_resized;
}
// create per-object mask
for (int i = 0; i < count; i++)
{
Object& obj = objects[i];
const ncnn::Mat mm = objects_mask.channel(i);
obj.mask = cv::Mat((int)obj.rect.height, (int)obj.rect.width, CV_8UC1);
// adjust offset to original unpadded and clip inside object box
for (int y = 0; y < (int)obj.rect.height; y++)
{
const float* pmm = mm.row((int)(hpad / 2 / scale + obj.rect.y + y)) + (int)(wpad / 2 / scale + obj.rect.x);
uchar* pmask = obj.mask.ptr<uchar>(y);
for (int x = 0; x < (int)obj.rect.width; x++)
{
pmask[x] = pmm[x] > mask_threshold ? 1 : 0;
}
}
}
// sort objects by area
struct
{
bool operator()(const Object& a, const Object& b) const
{
return a.rect.area() > b.rect.area();
}
} objects_area_greater;
std::sort(objects.begin(), objects.end(), objects_area_greater);
return 0;
}
int YOLOv8_seg::draw(cv::Mat& rgb, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
static cv::Scalar colors[] = {
cv::Scalar( 67, 54, 244),
cv::Scalar( 30, 99, 233),
cv::Scalar( 39, 176, 156),
cv::Scalar( 58, 183, 103),
cv::Scalar( 81, 181, 63),
cv::Scalar(150, 243, 33),
cv::Scalar(169, 244, 3),
cv::Scalar(188, 212, 0),
cv::Scalar(150, 136, 0),
cv::Scalar(175, 80, 76),
cv::Scalar(195, 74, 139),
cv::Scalar(220, 57, 205),
cv::Scalar(235, 59, 255),
cv::Scalar(193, 7, 255),
cv::Scalar(152, 0, 255),
cv::Scalar( 87, 34, 255),
cv::Scalar( 85, 72, 121),
cv::Scalar(158, 158, 158),
cv::Scalar(125, 139, 96)
};
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
const cv::Scalar& color = colors[i % 19];
// fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
// obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
for (int y = 0; y < (int)obj.rect.height; y++)
{
const uchar* maskptr = obj.mask.ptr<const uchar>(y);
uchar* bgrptr = rgb.ptr<uchar>((int)obj.rect.y + y) + (int)obj.rect.x * 3;
for (int x = 0; x < (int)obj.rect.width; x++)
{
if (maskptr[x])
{
bgrptr[0] = bgrptr[0] * 0.5 + color[0] * 0.5;
bgrptr[1] = bgrptr[1] * 0.5 + color[1] * 0.5;
bgrptr[2] = bgrptr[2] * 0.5 + color[2] * 0.5;
}
bgrptr += 3;
}
}
cv::rectangle(rgb, obj.rect, color);
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > rgb.cols)
x = rgb.cols - label_size.width;
cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(rgb, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
return 0;
}
... ...
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include <android/asset_manager_jni.h>
#include <android/native_window_jni.h>
#include <android/native_window.h>
#include <android/log.h>
#include <jni.h>
#include <string>
#include <vector>
#include <platform.h>
#include <benchmark.h>
#include "yolov8.h"
#include "ndkcamera.h"
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#if __ARM_NEON
#include <arm_neon.h>
#endif // __ARM_NEON
static int draw_unsupported(cv::Mat& rgb)
{
const char text[] = "unsupported";
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 1.0, 1, &baseLine);
int y = (rgb.rows - label_size.height) / 2;
int x = (rgb.cols - label_size.width) / 2;
cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(rgb, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(0, 0, 0));
return 0;
}
static int draw_fps(cv::Mat& rgb)
{
// resolve moving average
float avg_fps = 0.f;
{
static double t0 = 0.f;
static float fps_history[10] = {0.f};
double t1 = ncnn::get_current_time();
if (t0 == 0.f)
{
t0 = t1;
return 0;
}
float fps = 1000.f / (t1 - t0);
t0 = t1;
for (int i = 9; i >= 1; i--)
{
fps_history[i] = fps_history[i - 1];
}
fps_history[0] = fps;
if (fps_history[9] == 0.f)
{
return 0;
}
for (int i = 0; i < 10; i++)
{
avg_fps += fps_history[i];
}
avg_fps /= 10.f;
}
char text[32];
sprintf(text, "FPS=%.2f", avg_fps);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int y = 0;
int x = rgb.cols - label_size.width;
cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(rgb, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
return 0;
}
static YOLOv8* g_yolov8 = 0;
static ncnn::Mutex lock;
class MyNdkCamera : public NdkCameraWindow
{
public:
virtual void on_image_render(cv::Mat& rgb) const;
};
void MyNdkCamera::on_image_render(cv::Mat& rgb) const
{
// yolov8
{
ncnn::MutexLockGuard g(lock);
if (g_yolov8)
{
std::vector<Object> objects;
g_yolov8->detect(rgb, objects);
g_yolov8->draw(rgb, objects);
}
else
{
draw_unsupported(rgb);
}
}
draw_fps(rgb);
}
static MyNdkCamera* g_camera = 0;
extern "C" {
JNIEXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved)
{
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "JNI_OnLoad");
g_camera = new MyNdkCamera;
ncnn::create_gpu_instance();
return JNI_VERSION_1_4;
}
JNIEXPORT void JNI_OnUnload(JavaVM* vm, void* reserved)
{
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "JNI_OnUnload");
{
ncnn::MutexLockGuard g(lock);
delete g_yolov8;
g_yolov8 = 0;
}
ncnn::destroy_gpu_instance();
delete g_camera;
g_camera = 0;
}
// public native boolean loadModel(AssetManager mgr, int taskid, int modelid, int cpugpu);
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_loadModel(JNIEnv* env, jobject thiz, jobject assetManager, jint taskid, jint modelid, jint cpugpu)
{
if (taskid < 0 || taskid > 5 || modelid < 0 || modelid > 8 || cpugpu < 0 || cpugpu > 2)
{
return JNI_FALSE;
}
AAssetManager* mgr = AAssetManager_fromJava(env, assetManager);
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "loadModel %p", mgr);
const char* tasknames[6] =
{
"",
"_oiv7",
"_seg",
"_pose",
"_cls",
"_obb"
};
const char* modeltypes[9] =
{
"n",
"s",
"m",
"n",
"s",
"m",
"n",
"s",
"m"
};
std::string parampath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.param";
std::string modelpath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.bin";
bool use_gpu = (int)cpugpu == 1;
bool use_turnip = (int)cpugpu == 2;
// reload
{
ncnn::MutexLockGuard g(lock);
{
static int old_taskid = 0;
static int old_modelid = 0;
static int old_cpugpu = 0;
if (taskid != old_taskid || (modelid % 3) != old_modelid || cpugpu != old_cpugpu)
{
// taskid or model or cpugpu changed
delete g_yolov8;
g_yolov8 = 0;
}
old_taskid = taskid;
old_modelid = modelid % 3;
old_cpugpu = cpugpu;
ncnn::destroy_gpu_instance();
if (use_turnip)
{
ncnn::create_gpu_instance("libvulkan_freedreno.so");
}
else if (use_gpu)
{
ncnn::create_gpu_instance();
}
if (!g_yolov8)
{
if (taskid == 0) g_yolov8 = new YOLOv8_det_coco;
if (taskid == 1) g_yolov8 = new YOLOv8_det_oiv7;
if (taskid == 2) g_yolov8 = new YOLOv8_seg;
if (taskid == 3) g_yolov8 = new YOLOv8_pose;
if (taskid == 4) g_yolov8 = new YOLOv8_cls;
if (taskid == 5) g_yolov8 = new YOLOv8_obb;
g_yolov8->load(mgr, parampath.c_str(), modelpath.c_str(), use_gpu || use_turnip);
}
int target_size = 320;
if ((int)modelid >= 3)
target_size = 480;
if ((int)modelid >= 6)
target_size = 640;
g_yolov8->set_det_target_size(target_size);
}
}
return JNI_TRUE;
}
// public native boolean openCamera(int facing);
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_openCamera(JNIEnv* env, jobject thiz, jint facing)
{
if (facing < 0 || facing > 1)
return JNI_FALSE;
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "openCamera %d", facing);
g_camera->open((int)facing);
return JNI_TRUE;
}
// public native boolean closeCamera();
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_closeCamera(JNIEnv* env, jobject thiz)
{
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "closeCamera");
g_camera->close();
return JNI_TRUE;
}
// public native boolean setOutputWindow(Surface surface);
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_setOutputWindow(JNIEnv* env, jobject thiz, jobject surface)
{
ANativeWindow* win = ANativeWindow_fromSurface(env, surface);
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "setOutputWindow %p", win);
g_camera->set_window(win);
return JNI_TRUE;
}
}
... ...