正在显示
8 个修改的文件
包含
2877 行增加
和
0 行删除
| 1 | +// Tencent is pleased to support the open source community by making ncnn available. | ||
| 2 | +// | ||
| 3 | +// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. | ||
| 4 | +// | ||
| 5 | +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | ||
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 7 | +// | ||
| 8 | +// https://opensource.org/licenses/BSD-3-Clause | ||
| 9 | +// | ||
| 10 | +// Unless required by applicable law or agreed to in writing, software distributed | ||
| 11 | +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| 12 | +// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| 13 | +// specific language governing permissions and limitations under the License. | ||
| 14 | + | ||
| 15 | +#include "yolov8.h" | ||
| 16 | + | ||
| 17 | +YOLOv8::~YOLOv8() | ||
| 18 | +{ | ||
| 19 | + det_target_size = 320; | ||
| 20 | +} | ||
| 21 | + | ||
| 22 | +int YOLOv8::load(const char* parampath, const char* modelpath, bool use_gpu) | ||
| 23 | +{ | ||
| 24 | + yolov8.clear(); | ||
| 25 | + | ||
| 26 | + yolov8.opt = ncnn::Option(); | ||
| 27 | + | ||
| 28 | +#if NCNN_VULKAN | ||
| 29 | + yolov8.opt.use_vulkan_compute = use_gpu; | ||
| 30 | +#endif | ||
| 31 | + | ||
| 32 | + yolov8.load_param(parampath); | ||
| 33 | + yolov8.load_model(modelpath); | ||
| 34 | + | ||
| 35 | + return 0; | ||
| 36 | +} | ||
| 37 | + | ||
| 38 | +int YOLOv8::load(AAssetManager* mgr, const char* parampath, const char* modelpath, bool use_gpu) | ||
| 39 | +{ | ||
| 40 | + yolov8.clear(); | ||
| 41 | + | ||
| 42 | + yolov8.opt = ncnn::Option(); | ||
| 43 | + | ||
| 44 | +#if NCNN_VULKAN | ||
| 45 | + yolov8.opt.use_vulkan_compute = use_gpu; | ||
| 46 | +#endif | ||
| 47 | + | ||
| 48 | + yolov8.load_param(mgr, parampath); | ||
| 49 | + yolov8.load_model(mgr, modelpath); | ||
| 50 | + | ||
| 51 | + return 0; | ||
| 52 | +} | ||
| 53 | + | ||
| 54 | +void YOLOv8::set_det_target_size(int target_size) | ||
| 55 | +{ | ||
| 56 | + det_target_size = target_size; | ||
| 57 | +} |
| 1 | +// Tencent is pleased to support the open source community by making ncnn available. | ||
| 2 | +// | ||
| 3 | +// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. | ||
| 4 | +// | ||
| 5 | +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | ||
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 7 | +// | ||
| 8 | +// https://opensource.org/licenses/BSD-3-Clause | ||
| 9 | +// | ||
| 10 | +// Unless required by applicable law or agreed to in writing, software distributed | ||
| 11 | +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| 12 | +// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| 13 | +// specific language governing permissions and limitations under the License. | ||
| 14 | + | ||
| 15 | +#ifndef YOLOV8_H | ||
| 16 | +#define YOLOV8_H | ||
| 17 | + | ||
| 18 | +#include <opencv2/core/core.hpp> | ||
| 19 | + | ||
| 20 | +#include <net.h> | ||
| 21 | + | ||
| 22 | +struct KeyPoint | ||
| 23 | +{ | ||
| 24 | + cv::Point2f p; | ||
| 25 | + float prob; | ||
| 26 | +}; | ||
| 27 | + | ||
| 28 | +struct Object | ||
| 29 | +{ | ||
| 30 | + cv::Rect_<float> rect; | ||
| 31 | + cv::RotatedRect rrect; | ||
| 32 | + int label; | ||
| 33 | + float prob; | ||
| 34 | + int gindex; | ||
| 35 | + cv::Mat mask; | ||
| 36 | + std::vector<KeyPoint> keypoints; | ||
| 37 | +}; | ||
| 38 | + | ||
| 39 | +class YOLOv8 | ||
| 40 | +{ | ||
| 41 | +public: | ||
| 42 | + virtual ~YOLOv8(); | ||
| 43 | + | ||
| 44 | + int load(const char* parampath, const char* modelpath, bool use_gpu = false); | ||
| 45 | + int load(AAssetManager* mgr, const char* parampath, const char* modelpath, bool use_gpu = false); | ||
| 46 | + | ||
| 47 | + void set_det_target_size(int target_size); | ||
| 48 | + | ||
| 49 | + virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects) = 0; | ||
| 50 | + virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects) = 0; | ||
| 51 | + | ||
| 52 | +protected: | ||
| 53 | + ncnn::Net yolov8; | ||
| 54 | + int det_target_size; | ||
| 55 | +}; | ||
| 56 | + | ||
| 57 | +class YOLOv8_det : public YOLOv8 | ||
| 58 | +{ | ||
| 59 | +public: | ||
| 60 | + virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects); | ||
| 61 | +}; | ||
| 62 | + | ||
| 63 | +class YOLOv8_det_coco : public YOLOv8_det | ||
| 64 | +{ | ||
| 65 | +public: | ||
| 66 | + virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects); | ||
| 67 | +}; | ||
| 68 | + | ||
| 69 | +class YOLOv8_det_oiv7 : public YOLOv8_det | ||
| 70 | +{ | ||
| 71 | +public: | ||
| 72 | + virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects); | ||
| 73 | +}; | ||
| 74 | + | ||
| 75 | +class YOLOv8_seg : public YOLOv8 | ||
| 76 | +{ | ||
| 77 | +public: | ||
| 78 | + virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects); | ||
| 79 | + virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects); | ||
| 80 | +}; | ||
| 81 | + | ||
| 82 | +class YOLOv8_pose : public YOLOv8 | ||
| 83 | +{ | ||
| 84 | +public: | ||
| 85 | + virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects); | ||
| 86 | + virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects); | ||
| 87 | +}; | ||
| 88 | + | ||
| 89 | +class YOLOv8_cls : public YOLOv8 | ||
| 90 | +{ | ||
| 91 | +public: | ||
| 92 | + virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects); | ||
| 93 | + virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects); | ||
| 94 | +}; | ||
| 95 | + | ||
| 96 | +class YOLOv8_obb : public YOLOv8 | ||
| 97 | +{ | ||
| 98 | +public: | ||
| 99 | + virtual int detect(const cv::Mat& rgb, std::vector<Object>& objects); | ||
| 100 | + virtual int draw(cv::Mat& rgb, const std::vector<Object>& objects); | ||
| 101 | +}; | ||
| 102 | + | ||
| 103 | +#endif // YOLOV8_H |
| 1 | +// Tencent is pleased to support the open source community by making ncnn available. | ||
| 2 | +// | ||
| 3 | +// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. | ||
| 4 | +// | ||
| 5 | +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | ||
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 7 | +// | ||
| 8 | +// https://opensource.org/licenses/BSD-3-Clause | ||
| 9 | +// | ||
| 10 | +// Unless required by applicable law or agreed to in writing, software distributed | ||
| 11 | +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| 12 | +// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| 13 | +// specific language governing permissions and limitations under the License. | ||
| 14 | + | ||
| 15 | +// 1. install | ||
| 16 | +// pip3 install -U ultralytics pnnx ncnn | ||
| 17 | +// 2. export yolov8-cls torchscript | ||
| 18 | +// yolo export model=yolov8n-cls.pt format=torchscript | ||
| 19 | +// 3. convert torchscript with static shape | ||
| 20 | +// pnnx yolov8n-cls.torchscript | ||
| 21 | +// 4. now you get ncnn model files | ||
| 22 | +// yolov8n_cls.ncnn.param | ||
| 23 | +// yolov8n_cls.ncnn.bin | ||
| 24 | + | ||
| 25 | +#include "yolov8.h" | ||
| 26 | + | ||
| 27 | +#include <opencv2/core/core.hpp> | ||
| 28 | +#include <opencv2/imgproc/imgproc.hpp> | ||
| 29 | + | ||
| 30 | +#include <float.h> | ||
| 31 | +#include <stdio.h> | ||
| 32 | +#include <vector> | ||
| 33 | + | ||
| 34 | +static void get_topk(const ncnn::Mat& cls_scores, int topk, std::vector<Object>& objects) | ||
| 35 | +{ | ||
| 36 | + // partial sort topk with index | ||
| 37 | + int size = cls_scores.w; | ||
| 38 | + std::vector<std::pair<float, int> > vec; | ||
| 39 | + vec.resize(size); | ||
| 40 | + for (int i = 0; i < size; i++) | ||
| 41 | + { | ||
| 42 | + vec[i] = std::make_pair(cls_scores[i], i); | ||
| 43 | + } | ||
| 44 | + | ||
| 45 | + std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(), | ||
| 46 | + std::greater<std::pair<float, int> >()); | ||
| 47 | + | ||
| 48 | + objects.resize(topk); | ||
| 49 | + for (int i = 0; i < topk; i++) | ||
| 50 | + { | ||
| 51 | + objects[i].label = vec[i].second; | ||
| 52 | + objects[i].prob = vec[i].first; | ||
| 53 | + } | ||
| 54 | +} | ||
| 55 | + | ||
| 56 | +int YOLOv8_cls::detect(const cv::Mat& rgb, std::vector<Object>& objects) | ||
| 57 | +{ | ||
| 58 | + const int target_size = 224; | ||
| 59 | + const int topk = 5; | ||
| 60 | + | ||
| 61 | + int img_w = rgb.cols; | ||
| 62 | + int img_h = rgb.rows; | ||
| 63 | + | ||
| 64 | + // letterbox pad | ||
| 65 | + int w = img_w; | ||
| 66 | + int h = img_h; | ||
| 67 | + float scale = 1.f; | ||
| 68 | + if (w > h) | ||
| 69 | + { | ||
| 70 | + scale = (float)target_size / w; | ||
| 71 | + w = target_size; | ||
| 72 | + h = h * scale; | ||
| 73 | + } | ||
| 74 | + else | ||
| 75 | + { | ||
| 76 | + scale = (float)target_size / h; | ||
| 77 | + h = target_size; | ||
| 78 | + w = w * scale; | ||
| 79 | + } | ||
| 80 | + | ||
| 81 | + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h); | ||
| 82 | + | ||
| 83 | + // letterbox pad to target_size rectangle | ||
| 84 | + int wpad = target_size - w; | ||
| 85 | + int hpad = target_size - h; | ||
| 86 | + ncnn::Mat in_pad; | ||
| 87 | + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f); | ||
| 88 | + | ||
| 89 | + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; | ||
| 90 | + in_pad.substract_mean_normalize(0, norm_vals); | ||
| 91 | + | ||
| 92 | + ncnn::Extractor ex = yolov8.create_extractor(); | ||
| 93 | + | ||
| 94 | + ex.input("in0", in_pad); | ||
| 95 | + | ||
| 96 | + ncnn::Mat out; | ||
| 97 | + ex.extract("out0", out); | ||
| 98 | + | ||
| 99 | + // return top-5 | ||
| 100 | + get_topk(out, topk, objects); | ||
| 101 | + | ||
| 102 | + return 0; | ||
| 103 | +} | ||
| 104 | + | ||
| 105 | +int YOLOv8_cls::draw(cv::Mat& rgb, const std::vector<Object>& objects) | ||
| 106 | +{ | ||
| 107 | + static const char* class_names[] = { | ||
| 108 | + "tench", "goldfish", "great white shark", "tiger shark", "hammerhead", "electric ray", "stingray", "cock", | ||
| 109 | + "hen", "ostrich", "brambling", "goldfinch", "house finch", "junco", "indigo bunting", "robin", "bulbul", | ||
| 110 | + "jay", "magpie", "chickadee", "water ouzel", "kite", "bald eagle", "vulture", "great grey owl", | ||
| 111 | + "European fire salamander", "common newt", "eft", "spotted salamander", "axolotl", "bullfrog", "tree frog", | ||
| 112 | + "tailed frog", "loggerhead", "leatherback turtle", "mud turtle", "terrapin", "box turtle", "banded gecko", | ||
| 113 | + "common iguana", "American chameleon", "whiptail", "agama", "frilled lizard", "alligator lizard", | ||
| 114 | + "Gila monster", "green lizard", "African chameleon", "Komodo dragon", "African crocodile", | ||
| 115 | + "American alligator", "triceratops", "thunder snake", "ringneck snake", "hognose snake", "green snake", | ||
| 116 | + "king snake", "garter snake", "water snake", "vine snake", "night snake", "boa constrictor", "rock python", | ||
| 117 | + "Indian cobra", "green mamba", "sea snake", "horned viper", "diamondback", "sidewinder", "trilobite", | ||
| 118 | + "harvestman", "scorpion", "black and gold garden spider", "barn spider", "garden spider", "black widow", | ||
| 119 | + "tarantula", "wolf spider", "tick", "centipede", "black grouse", "ptarmigan", "ruffed grouse", | ||
| 120 | + "prairie chicken", "peacock", "quail", "partridge", "African grey", "macaw", "sulphur-crested cockatoo", | ||
| 121 | + "lorikeet", "coucal", "bee eater", "hornbill", "hummingbird", "jacamar", "toucan", "drake", | ||
| 122 | + "red-breasted merganser", "goose", "black swan", "tusker", "echidna", "platypus", "wallaby", "koala", | ||
| 123 | + "wombat", "jellyfish", "sea anemone", "brain coral", "flatworm", "nematode", "conch", "snail", "slug", | ||
| 124 | + "sea slug", "chiton", "chambered nautilus", "Dungeness crab", "rock crab", "fiddler crab", "king crab", | ||
| 125 | + "American lobster", "spiny lobster", "crayfish", "hermit crab", "isopod", "white stork", "black stork", | ||
| 126 | + "spoonbill", "flamingo", "little blue heron", "American egret", "bittern", "crane (bird)", "limpkin", | ||
| 127 | + "European gallinule", "American coot", "bustard", "ruddy turnstone", "red-backed sandpiper", "redshank", | ||
| 128 | + "dowitcher", "oystercatcher", "pelican", "king penguin", "albatross", "grey whale", "killer whale", | ||
| 129 | + "dugong", "sea lion", "Chihuahua", "Japanese spaniel", "Maltese dog", "Pekinese", "Shih-Tzu", | ||
| 130 | + "Blenheim spaniel", "papillon", "toy terrier", "Rhodesian ridgeback", "Afghan hound", "basset", "beagle", | ||
| 131 | + "bloodhound", "bluetick", "black-and-tan coonhound", "Walker hound", "English foxhound", "redbone", | ||
| 132 | + "borzoi", "Irish wolfhound", "Italian greyhound", "whippet", "Ibizan hound", "Norwegian elkhound", | ||
| 133 | + "otterhound", "Saluki", "Scottish deerhound", "Weimaraner", "Staffordshire bullterrier", | ||
| 134 | + "American Staffordshire terrier", "Bedlington terrier", "Border terrier", "Kerry blue terrier", | ||
| 135 | + "Irish terrier", "Norfolk terrier", "Norwich terrier", "Yorkshire terrier", "wire-haired fox terrier", | ||
| 136 | + "Lakeland terrier", "Sealyham terrier", "Airedale", "cairn", "Australian terrier", "Dandie Dinmont", | ||
| 137 | + "Boston bull", "miniature schnauzer", "giant schnauzer", "standard schnauzer", "Scotch terrier", | ||
| 138 | + "Tibetan terrier", "silky terrier", "soft-coated wheaten terrier", "West Highland white terrier", | ||
| 139 | + "Lhasa", "flat-coated retriever", "curly-coated retriever", "golden retriever", "Labrador retriever", | ||
| 140 | + "Chesapeake Bay retriever", "German short-haired pointer", "vizsla", "English setter", "Irish setter", | ||
| 141 | + "Gordon setter", "Brittany spaniel", "clumber", "English springer", "Welsh springer spaniel", | ||
| 142 | + "cocker spaniel", "Sussex spaniel", "Irish water spaniel", "kuvasz", "schipperke", "groenendael", | ||
| 143 | + "malinois", "briard", "kelpie", "komondor", "Old English sheepdog", "Shetland sheepdog", "collie", | ||
| 144 | + "Border collie", "Bouvier des Flandres", "Rottweiler", "German shepherd", "Doberman", | ||
| 145 | + "miniature pinscher", "Greater Swiss Mountain dog", "Bernese mountain dog", "Appenzeller", "EntleBucher", | ||
| 146 | + "boxer", "bull mastiff", "Tibetan mastiff", "French bulldog", "Great Dane", "Saint Bernard", | ||
| 147 | + "Eskimo dog", "malamute", "Siberian husky", "dalmatian", "affenpinscher", "basenji", "pug", "Leonberg", | ||
| 148 | + "Newfoundland", "Great Pyrenees", "Samoyed", "Pomeranian", "chow", "keeshond", "Brabancon griffon", | ||
| 149 | + "Pembroke", "Cardigan", "toy poodle", "miniature poodle", "standard poodle", "Mexican hairless", | ||
| 150 | + "timber wolf", "white wolf", "red wolf", "coyote", "dingo", "dhole", "African hunting dog", "hyena", | ||
| 151 | + "red fox", "kit fox", "Arctic fox", "grey fox", "tabby", "tiger cat", "Persian cat", "Siamese cat", | ||
| 152 | + "Egyptian cat", "cougar", "lynx", "leopard", "snow leopard", "jaguar", "lion", "tiger", "cheetah", | ||
| 153 | + "brown bear", "American black bear", "ice bear", "sloth bear", "mongoose", "meerkat", "tiger beetle", | ||
| 154 | + "ladybug", "ground beetle", "long-horned beetle", "leaf beetle", "dung beetle", "rhinoceros beetle", | ||
| 155 | + "weevil", "fly", "bee", "ant", "grasshopper", "cricket", "walking stick", "cockroach", "mantis", | ||
| 156 | + "cicada", "leafhopper", "lacewing", "dragonfly", "damselfly", "admiral", "ringlet", "monarch", | ||
| 157 | + "cabbage butterfly", "sulphur butterfly", "lycaenid", "starfish", "sea urchin", "sea cucumber", | ||
| 158 | + "wood rabbit", "hare", "Angora", "hamster", "porcupine", "fox squirrel", "marmot", "beaver", | ||
| 159 | + "guinea pig", "sorrel", "zebra", "hog", "wild boar", "warthog", "hippopotamus", "ox", "water buffalo", | ||
| 160 | + "bison", "ram", "bighorn", "ibex", "hartebeest", "impala", "gazelle", "Arabian camel", "llama", | ||
| 161 | + "weasel", "mink", "polecat", "black-footed ferret", "otter", "skunk", "badger", "armadillo", | ||
| 162 | + "three-toed sloth", "orangutan", "gorilla", "chimpanzee", "gibbon", "siamang", "guenon", "patas", | ||
| 163 | + "baboon", "macaque", "langur", "colobus", "proboscis monkey", "marmoset", "capuchin", "howler monkey", | ||
| 164 | + "titi", "spider monkey", "squirrel monkey", "Madagascar cat", "indri", "Indian elephant", | ||
| 165 | + "African elephant", "lesser panda", "giant panda", "barracouta", "eel", "coho", "rock beauty", | ||
| 166 | + "anemone fish", "sturgeon", "gar", "lionfish", "puffer", "abacus", "abaya", "academic gown", | ||
| 167 | + "accordion", "acoustic guitar", "aircraft carrier", "airliner", "airship", "altar", "ambulance", | ||
| 168 | + "amphibian", "analog clock", "apiary", "apron", "ashcan", "assault rifle", "backpack", "bakery", | ||
| 169 | + "balance beam", "balloon", "ballpoint", "Band Aid", "banjo", "bannister", "barbell", "barber chair", | ||
| 170 | + "barbershop", "barn", "barometer", "barrel", "barrow", "baseball", "basketball", "bassinet", "bassoon", | ||
| 171 | + "bathing cap", "bath towel", "bathtub", "beach wagon", "beacon", "beaker", "bearskin", "beer bottle", | ||
| 172 | + "beer glass", "bell cote", "bib", "bicycle-built-for-two", "bikini", "binder", "binoculars", | ||
| 173 | + "birdhouse", "boathouse", "bobsled", "bolo tie", "bonnet", "bookcase", "bookshop", "bottlecap", "bow", | ||
| 174 | + "bow tie", "brass", "brassiere", "breakwater", "breastplate", "broom", "bucket", "buckle", | ||
| 175 | + "bulletproof vest", "bullet train", "butcher shop", "cab", "caldron", "candle", "cannon", "canoe", | ||
| 176 | + "can opener", "cardigan", "car mirror", "carousel", "carpenter's kit", "carton", "car wheel", | ||
| 177 | + "cash machine", "cassette", "cassette player", "castle", "catamaran", "CD player", "cello", | ||
| 178 | + "cellular telephone", "chain", "chainlink fence", "chain mail", "chain saw", "chest", "chiffonier", | ||
| 179 | + "chime", "china cabinet", "Christmas stocking", "church", "cinema", "cleaver", "cliff dwelling", | ||
| 180 | + "cloak", "clog", "cocktail shaker", "coffee mug", "coffeepot", "coil", "combination lock", | ||
| 181 | + "computer keyboard", "confectionery", "container ship", "convertible", "corkscrew", "cornet", | ||
| 182 | + "cowboy boot", "cowboy hat", "cradle", "crane (machine)", "crash helmet", "crate", "crib", | ||
| 183 | + "Crock Pot", "croquet ball", "crutch", "cuirass", "dam", "desk", "desktop computer", "dial telephone", | ||
| 184 | + "diaper", "digital clock", "digital watch", "dining table", "dishrag", "dishwasher", "disk brake", | ||
| 185 | + "dock", "dogsled", "dome", "doormat", "drilling platform", "drum", "drumstick", "dumbbell", | ||
| 186 | + "Dutch oven", "electric fan", "electric guitar", "electric locomotive", "entertainment center", | ||
| 187 | + "envelope", "espresso maker", "face powder", "feather boa", "file", "fireboat", "fire engine", | ||
| 188 | + "fire screen", "flagpole", "flute", "folding chair", "football helmet", "forklift", "fountain", | ||
| 189 | + "fountain pen", "four-poster", "freight car", "French horn", "frying pan", "fur coat", "garbage truck", | ||
| 190 | + "gasmask", "gas pump", "goblet", "go-kart", "golf ball", "golfcart", "gondola", "gong", "gown", | ||
| 191 | + "grand piano", "greenhouse", "grille", "grocery store", "guillotine", "hair slide", "hair spray", | ||
| 192 | + "half track", "hammer", "hamper", "hand blower", "hand-held computer", "handkerchief", "hard disc", | ||
| 193 | + "harmonica", "harp", "harvester", "hatchet", "holster", "home theater", "honeycomb", "hook", | ||
| 194 | + "hoopskirt", "horizontal bar", "horse cart", "hourglass", "iPod", "iron", "jack-o'-lantern", "jean", | ||
| 195 | + "jeep", "jersey", "jigsaw puzzle", "jinrikisha", "joystick", "kimono", "knee pad", "knot", "lab coat", | ||
| 196 | + "ladle", "lampshade", "laptop", "lawn mower", "lens cap", "letter opener", "library", "lifeboat", | ||
| 197 | + "lighter", "limousine", "liner", "lipstick", "Loafer", "lotion", "loudspeaker", "loupe", "lumbermill", | ||
| 198 | + "magnetic compass", "mailbag", "mailbox", "maillot (tights)", "maillot (tank suit)", "manhole cover", | ||
| 199 | + "maraca", "marimba", "mask", "matchstick", "maypole", "maze", "measuring cup", "medicine chest", | ||
| 200 | + "megalith", "microphone", "microwave", "military uniform", "milk can", "minibus", "miniskirt", | ||
| 201 | + "minivan", "missile", "mitten", "mixing bowl", "mobile home", "Model T", "modem", "monastery", | ||
| 202 | + "monitor", "moped", "mortar", "mortarboard", "mosque", "mosquito net", "motor scooter", "mountain bike", | ||
| 203 | + "mountain tent", "mouse", "mousetrap", "moving van", "muzzle", "nail", "neck brace", "necklace", | ||
| 204 | + "nipple", "notebook", "obelisk", "oboe", "ocarina", "odometer", "oil filter", "organ", "oscilloscope", | ||
| 205 | + "overskirt", "oxcart", "oxygen mask", "packet", "paddle", "paddlewheel", "padlock", "paintbrush", | ||
| 206 | + "pajama", "palace", "panpipe", "paper towel", "parachute", "parallel bars", "park bench", | ||
| 207 | + "parking meter", "passenger car", "patio", "pay-phone", "pedestal", "pencil box", "pencil sharpener", | ||
| 208 | + "perfume", "Petri dish", "photocopier", "pick", "pickelhaube", "picket fence", "pickup", "pier", | ||
| 209 | + "piggy bank", "pill bottle", "pillow", "ping-pong ball", "pinwheel", "pirate", "pitcher", "plane", | ||
| 210 | + "planetarium", "plastic bag", "plate rack", "plow", "plunger", "Polaroid camera", "pole", | ||
| 211 | + "police van", "poncho", "pool table", "pop bottle", "pot", "potter's wheel", "power drill", | ||
| 212 | + "prayer rug", "printer", "prison", "projectile", "projector", "puck", "punching bag", "purse", | ||
| 213 | + "quill", "quilt", "racer", "racket", "radiator", "radio", "radio telescope", "rain barrel", | ||
| 214 | + "recreational vehicle", "reel", "reflex camera", "refrigerator", "remote control", "restaurant", | ||
| 215 | + "revolver", "rifle", "rocking chair", "rotisserie", "rubber eraser", "rugby ball", "rule", | ||
| 216 | + "running shoe", "safe", "safety pin", "saltshaker", "sandal", "sarong", "sax", "scabbard", "scale", | ||
| 217 | + "school bus", "schooner", "scoreboard", "screen", "screw", "screwdriver", "seat belt", "sewing machine", | ||
| 218 | + "shield", "shoe shop", "shoji", "shopping basket", "shopping cart", "shovel", "shower cap", | ||
| 219 | + "shower curtain", "ski", "ski mask", "sleeping bag", "slide rule", "sliding door", "slot", "snorkel", | ||
| 220 | + "snowmobile", "snowplow", "soap dispenser", "soccer ball", "sock", "solar dish", "sombrero", | ||
| 221 | + "soup bowl", "space bar", "space heater", "space shuttle", "spatula", "speedboat", "spider web", | ||
| 222 | + "spindle", "sports car", "spotlight", "stage", "steam locomotive", "steel arch bridge", "steel drum", | ||
| 223 | + "stethoscope", "stole", "stone wall", "stopwatch", "stove", "strainer", "streetcar", "stretcher", | ||
| 224 | + "studio couch", "stupa", "submarine", "suit", "sundial", "sunglass", "sunglasses", "sunscreen", | ||
| 225 | + "suspension bridge", "swab", "sweatshirt", "swimming trunks", "swing", "switch", "syringe", | ||
| 226 | + "table lamp", "tank", "tape player", "teapot", "teddy", "television", "tennis ball", "thatch", | ||
| 227 | + "theater curtain", "thimble", "thresher", "throne", "tile roof", "toaster", "tobacco shop", | ||
| 228 | + "toilet seat", "torch", "totem pole", "tow truck", "toyshop", "tractor", "trailer truck", "tray", | ||
| 229 | + "trench coat", "tricycle", "trimaran", "tripod", "triumphal arch", "trolleybus", "trombone", "tub", | ||
| 230 | + "turnstile", "typewriter keyboard", "umbrella", "unicycle", "upright", "vacuum", "vase", "vault", | ||
| 231 | + "velvet", "vending machine", "vestment", "viaduct", "violin", "volleyball", "waffle iron", "wall clock", | ||
| 232 | + "wallet", "wardrobe", "warplane", "washbasin", "washer", "water bottle", "water jug", "water tower", | ||
| 233 | + "whiskey jug", "whistle", "wig", "window screen", "window shade", "Windsor tie", "wine bottle", "wing", | ||
| 234 | + "wok", "wooden spoon", "wool", "worm fence", "wreck", "yawl", "yurt", "web site", "comic book", | ||
| 235 | + "crossword puzzle", "street sign", "traffic light", "book jacket", "menu", "plate", "guacamole", | ||
| 236 | + "consomme", "hot pot", "trifle", "ice cream", "ice lolly", "French loaf", "bagel", "pretzel", | ||
| 237 | + "cheeseburger", "hotdog", "mashed potato", "head cabbage", "broccoli", "cauliflower", "zucchini", | ||
| 238 | + "spaghetti squash", "acorn squash", "butternut squash", "cucumber", "artichoke", "bell pepper", | ||
| 239 | + "cardoon", "mushroom", "Granny Smith", "strawberry", "orange", "lemon", "fig", "pineapple", "banana", | ||
| 240 | + "jackfruit", "custard apple", "pomegranate", "hay", "carbonara", "chocolate sauce", "dough", | ||
| 241 | + "meat loaf", "pizza", "potpie", "burrito", "red wine", "espresso", "cup", "eggnog", "alp", "bubble", | ||
| 242 | + "cliff", "coral reef", "geyser", "lakeside", "promontory", "sandbar", "seashore", "valley", "volcano", | ||
| 243 | + "ballplayer", "groom", "scuba diver", "rapeseed", "daisy", "yellow lady's slipper", "corn", "acorn", | ||
| 244 | + "hip", "buckeye", "coral fungus", "agaric", "gyromitra", "stinkhorn", "earthstar", "hen-of-the-woods", | ||
| 245 | + "bolete", "ear", "toilet tissue" | ||
| 246 | + }; | ||
| 247 | + | ||
| 248 | + int y_offset = 0; | ||
| 249 | + for (size_t i = 0; i < objects.size(); i++) | ||
| 250 | + { | ||
| 251 | + const Object& obj = objects[i]; | ||
| 252 | + | ||
| 253 | + // fprintf(stderr, "%d = %.5f\n", obj.label, obj.prob); | ||
| 254 | + | ||
| 255 | + char text[256]; | ||
| 256 | + sprintf(text, "%4.1f%% %s", obj.prob * 100, class_names[obj.label]); | ||
| 257 | + | ||
| 258 | + int baseLine = 0; | ||
| 259 | + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); | ||
| 260 | + | ||
| 261 | + int x = 0; | ||
| 262 | + int y = y_offset; | ||
| 263 | + | ||
| 264 | + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), | ||
| 265 | + cv::Scalar(255, 255, 255), -1); | ||
| 266 | + | ||
| 267 | + cv::putText(rgb, text, cv::Point(x, y + label_size.height), | ||
| 268 | + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); | ||
| 269 | + | ||
| 270 | + y_offset += label_size.height; | ||
| 271 | + } | ||
| 272 | + | ||
| 273 | + return 0; | ||
| 274 | +} |
| 1 | +// Tencent is pleased to support the open source community by making ncnn available. | ||
| 2 | +// | ||
| 3 | +// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. | ||
| 4 | +// | ||
| 5 | +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | ||
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 7 | +// | ||
| 8 | +// https://opensource.org/licenses/BSD-3-Clause | ||
| 9 | +// | ||
| 10 | +// Unless required by applicable law or agreed to in writing, software distributed | ||
| 11 | +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| 12 | +// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| 13 | +// specific language governing permissions and limitations under the License. | ||
| 14 | + | ||
| 15 | +// 1. install | ||
| 16 | +// pip3 install -U ultralytics pnnx ncnn | ||
| 17 | +// 2. export yolov8 torchscript | ||
| 18 | +// yolo export model=yolov8n.pt format=torchscript | ||
| 19 | +// 3. convert torchscript with static shape | ||
| 20 | +// pnnx yolov8n.torchscript | ||
| 21 | +// 4. modify yolov8n_pnnx.py for dynamic shape inference | ||
| 22 | +// A. modify reshape to support dynamic image sizes | ||
| 23 | +// B. permute tensor before concat and adjust concat axis | ||
| 24 | +// C. drop post-process part | ||
| 25 | +// before: | ||
| 26 | +// v_165 = v_142.view(1, 144, 6400) | ||
| 27 | +// v_166 = v_153.view(1, 144, 1600) | ||
| 28 | +// v_167 = v_164.view(1, 144, 400) | ||
| 29 | +// v_168 = torch.cat((v_165, v_166, v_167), dim=2) | ||
| 30 | +// ... | ||
| 31 | +// after: | ||
| 32 | +// v_165 = v_142.view(1, 144, -1).transpose(1, 2) | ||
| 33 | +// v_166 = v_153.view(1, 144, -1).transpose(1, 2) | ||
| 34 | +// v_167 = v_164.view(1, 144, -1).transpose(1, 2) | ||
| 35 | +// v_168 = torch.cat((v_165, v_166, v_167), dim=1) | ||
| 36 | +// return v_168 | ||
| 37 | +// 5. re-export yolov8 torchscript | ||
| 38 | +// python3 -c 'import yolov8n_pnnx; yolov8n_pnnx.export_torchscript()' | ||
| 39 | +// 6. convert new torchscript with dynamic shape | ||
| 40 | +// pnnx yolov8n_pnnx.py.pt inputshape=[1,3,640,640] inputshape2=[1,3,320,320] | ||
| 41 | +// 7. now you get ncnn model files | ||
| 42 | +// mv yolov8n_pnnx.py.ncnn.param yolov8n.ncnn.param | ||
| 43 | +// mv yolov8n_pnnx.py.ncnn.bin yolov8n.ncnn.bin | ||
| 44 | + | ||
| 45 | +// the out blob would be a 2-dim tensor with w=144 h=8400 | ||
| 46 | +// | ||
| 47 | +// | bbox-reg 16 x 4 | per-class scores(80) | | ||
| 48 | +// +-----+-----+-----+-----+----------------------+ | ||
| 49 | +// | dx0 | dy0 | dx1 | dy1 |0.1 0.0 0.0 0.5 ......| | ||
| 50 | +// all /| | | | | . | | ||
| 51 | +// boxes | .. | .. | .. | .. |0.0 0.9 0.0 0.0 ......| | ||
| 52 | +// (8400)| | | | | . | | ||
| 53 | +// \| | | | | . | | ||
| 54 | +// +-----+-----+-----+-----+----------------------+ | ||
| 55 | +// | ||
| 56 | + | ||
| 57 | +#include "yolov8.h" | ||
| 58 | + | ||
| 59 | +#include <opencv2/core/core.hpp> | ||
| 60 | +#include <opencv2/imgproc/imgproc.hpp> | ||
| 61 | + | ||
| 62 | +static inline float intersection_area(const Object& a, const Object& b) | ||
| 63 | +{ | ||
| 64 | + cv::Rect_<float> inter = a.rect & b.rect; | ||
| 65 | + return inter.area(); | ||
| 66 | +} | ||
| 67 | + | ||
| 68 | +static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right) | ||
| 69 | +{ | ||
| 70 | + int i = left; | ||
| 71 | + int j = right; | ||
| 72 | + float p = objects[(left + right) / 2].prob; | ||
| 73 | + | ||
| 74 | + while (i <= j) | ||
| 75 | + { | ||
| 76 | + while (objects[i].prob > p) | ||
| 77 | + i++; | ||
| 78 | + | ||
| 79 | + while (objects[j].prob < p) | ||
| 80 | + j--; | ||
| 81 | + | ||
| 82 | + if (i <= j) | ||
| 83 | + { | ||
| 84 | + // swap | ||
| 85 | + std::swap(objects[i], objects[j]); | ||
| 86 | + | ||
| 87 | + i++; | ||
| 88 | + j--; | ||
| 89 | + } | ||
| 90 | + } | ||
| 91 | + | ||
| 92 | + // #pragma omp parallel sections | ||
| 93 | + { | ||
| 94 | + // #pragma omp section | ||
| 95 | + { | ||
| 96 | + if (left < j) qsort_descent_inplace(objects, left, j); | ||
| 97 | + } | ||
| 98 | + // #pragma omp section | ||
| 99 | + { | ||
| 100 | + if (i < right) qsort_descent_inplace(objects, i, right); | ||
| 101 | + } | ||
| 102 | + } | ||
| 103 | +} | ||
| 104 | + | ||
| 105 | +static void qsort_descent_inplace(std::vector<Object>& objects) | ||
| 106 | +{ | ||
| 107 | + if (objects.empty()) | ||
| 108 | + return; | ||
| 109 | + | ||
| 110 | + qsort_descent_inplace(objects, 0, objects.size() - 1); | ||
| 111 | +} | ||
| 112 | + | ||
| 113 | +static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) | ||
| 114 | +{ | ||
| 115 | + picked.clear(); | ||
| 116 | + | ||
| 117 | + const int n = objects.size(); | ||
| 118 | + | ||
| 119 | + std::vector<float> areas(n); | ||
| 120 | + for (int i = 0; i < n; i++) | ||
| 121 | + { | ||
| 122 | + areas[i] = objects[i].rect.area(); | ||
| 123 | + } | ||
| 124 | + | ||
| 125 | + for (int i = 0; i < n; i++) | ||
| 126 | + { | ||
| 127 | + const Object& a = objects[i]; | ||
| 128 | + | ||
| 129 | + int keep = 1; | ||
| 130 | + for (int j = 0; j < (int)picked.size(); j++) | ||
| 131 | + { | ||
| 132 | + const Object& b = objects[picked[j]]; | ||
| 133 | + | ||
| 134 | + if (!agnostic && a.label != b.label) | ||
| 135 | + continue; | ||
| 136 | + | ||
| 137 | + // intersection over union | ||
| 138 | + float inter_area = intersection_area(a, b); | ||
| 139 | + float union_area = areas[i] + areas[picked[j]] - inter_area; | ||
| 140 | + // float IoU = inter_area / union_area | ||
| 141 | + if (inter_area / union_area > nms_threshold) | ||
| 142 | + keep = 0; | ||
| 143 | + } | ||
| 144 | + | ||
| 145 | + if (keep) | ||
| 146 | + picked.push_back(i); | ||
| 147 | + } | ||
| 148 | +} | ||
| 149 | + | ||
| 150 | +static inline float sigmoid(float x) | ||
| 151 | +{ | ||
| 152 | + return 1.0f / (1.0f + expf(-x)); | ||
| 153 | +} | ||
| 154 | + | ||
| 155 | +static void generate_proposals(const ncnn::Mat& pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects) | ||
| 156 | +{ | ||
| 157 | + const int w = in_pad.w; | ||
| 158 | + const int h = in_pad.h; | ||
| 159 | + | ||
| 160 | + const int num_grid_x = w / stride; | ||
| 161 | + const int num_grid_y = h / stride; | ||
| 162 | + | ||
| 163 | + const int reg_max_1 = 16; | ||
| 164 | + const int num_class = pred.w - reg_max_1 * 4; // number of classes. 80 for COCO | ||
| 165 | + | ||
| 166 | + for (int y = 0; y < num_grid_y; y++) | ||
| 167 | + { | ||
| 168 | + for (int x = 0; x < num_grid_x; x++) | ||
| 169 | + { | ||
| 170 | + const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1); | ||
| 171 | + | ||
| 172 | + // find label with max score | ||
| 173 | + int label = -1; | ||
| 174 | + float score = -FLT_MAX; | ||
| 175 | + { | ||
| 176 | + const ncnn::Mat pred_score = pred_grid.range(reg_max_1 * 4, num_class); | ||
| 177 | + | ||
| 178 | + for (int k = 0; k < num_class; k++) | ||
| 179 | + { | ||
| 180 | + float s = pred_score[k]; | ||
| 181 | + if (s > score) | ||
| 182 | + { | ||
| 183 | + label = k; | ||
| 184 | + score = s; | ||
| 185 | + } | ||
| 186 | + } | ||
| 187 | + | ||
| 188 | + score = sigmoid(score); | ||
| 189 | + } | ||
| 190 | + | ||
| 191 | + if (score >= prob_threshold) | ||
| 192 | + { | ||
| 193 | + ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4); | ||
| 194 | + | ||
| 195 | + { | ||
| 196 | + ncnn::Layer* softmax = ncnn::create_layer("Softmax"); | ||
| 197 | + | ||
| 198 | + ncnn::ParamDict pd; | ||
| 199 | + pd.set(0, 1); // axis | ||
| 200 | + pd.set(1, 1); | ||
| 201 | + softmax->load_param(pd); | ||
| 202 | + | ||
| 203 | + ncnn::Option opt; | ||
| 204 | + opt.num_threads = 1; | ||
| 205 | + opt.use_packing_layout = false; | ||
| 206 | + | ||
| 207 | + softmax->create_pipeline(opt); | ||
| 208 | + | ||
| 209 | + softmax->forward_inplace(pred_bbox, opt); | ||
| 210 | + | ||
| 211 | + softmax->destroy_pipeline(opt); | ||
| 212 | + | ||
| 213 | + delete softmax; | ||
| 214 | + } | ||
| 215 | + | ||
| 216 | + float pred_ltrb[4]; | ||
| 217 | + for (int k = 0; k < 4; k++) | ||
| 218 | + { | ||
| 219 | + float dis = 0.f; | ||
| 220 | + const float* dis_after_sm = pred_bbox.row(k); | ||
| 221 | + for (int l = 0; l < reg_max_1; l++) | ||
| 222 | + { | ||
| 223 | + dis += l * dis_after_sm[l]; | ||
| 224 | + } | ||
| 225 | + | ||
| 226 | + pred_ltrb[k] = dis * stride; | ||
| 227 | + } | ||
| 228 | + | ||
| 229 | + float pb_cx = (x + 0.5f) * stride; | ||
| 230 | + float pb_cy = (y + 0.5f) * stride; | ||
| 231 | + | ||
| 232 | + float x0 = pb_cx - pred_ltrb[0]; | ||
| 233 | + float y0 = pb_cy - pred_ltrb[1]; | ||
| 234 | + float x1 = pb_cx + pred_ltrb[2]; | ||
| 235 | + float y1 = pb_cy + pred_ltrb[3]; | ||
| 236 | + | ||
| 237 | + Object obj; | ||
| 238 | + obj.rect.x = x0; | ||
| 239 | + obj.rect.y = y0; | ||
| 240 | + obj.rect.width = x1 - x0; | ||
| 241 | + obj.rect.height = y1 - y0; | ||
| 242 | + obj.label = label; | ||
| 243 | + obj.prob = score; | ||
| 244 | + | ||
| 245 | + objects.push_back(obj); | ||
| 246 | + } | ||
| 247 | + } | ||
| 248 | + } | ||
| 249 | +} | ||
| 250 | + | ||
| 251 | +static void generate_proposals(const ncnn::Mat& pred, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects) | ||
| 252 | +{ | ||
| 253 | + const int w = in_pad.w; | ||
| 254 | + const int h = in_pad.h; | ||
| 255 | + | ||
| 256 | + int pred_row_offset = 0; | ||
| 257 | + for (size_t i = 0; i < strides.size(); i++) | ||
| 258 | + { | ||
| 259 | + const int stride = strides[i]; | ||
| 260 | + | ||
| 261 | + const int num_grid_x = w / stride; | ||
| 262 | + const int num_grid_y = h / stride; | ||
| 263 | + const int num_grid = num_grid_x * num_grid_y; | ||
| 264 | + | ||
| 265 | + generate_proposals(pred.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects); | ||
| 266 | + pred_row_offset += num_grid; | ||
| 267 | + } | ||
| 268 | +} | ||
| 269 | + | ||
| 270 | +int YOLOv8_det::detect(const cv::Mat& rgb, std::vector<Object>& objects) | ||
| 271 | +{ | ||
| 272 | + const int target_size = det_target_size;//640; | ||
| 273 | + const float prob_threshold = 0.25f; | ||
| 274 | + const float nms_threshold = 0.45f; | ||
| 275 | + | ||
| 276 | + int img_w = rgb.cols; | ||
| 277 | + int img_h = rgb.rows; | ||
| 278 | + | ||
| 279 | + // ultralytics/cfg/models/v8/yolov8.yaml | ||
| 280 | + std::vector<int> strides(3); | ||
| 281 | + strides[0] = 8; | ||
| 282 | + strides[1] = 16; | ||
| 283 | + strides[2] = 32; | ||
| 284 | + const int max_stride = 32; | ||
| 285 | + | ||
| 286 | + // letterbox pad to multiple of max_stride | ||
| 287 | + int w = img_w; | ||
| 288 | + int h = img_h; | ||
| 289 | + float scale = 1.f; | ||
| 290 | + if (w > h) | ||
| 291 | + { | ||
| 292 | + scale = (float)target_size / w; | ||
| 293 | + w = target_size; | ||
| 294 | + h = h * scale; | ||
| 295 | + } | ||
| 296 | + else | ||
| 297 | + { | ||
| 298 | + scale = (float)target_size / h; | ||
| 299 | + h = target_size; | ||
| 300 | + w = w * scale; | ||
| 301 | + } | ||
| 302 | + | ||
| 303 | + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h); | ||
| 304 | + | ||
| 305 | + // letterbox pad to target_size rectangle | ||
| 306 | + int wpad = (w + max_stride - 1) / max_stride * max_stride - w; | ||
| 307 | + int hpad = (h + max_stride - 1) / max_stride * max_stride - h; | ||
| 308 | + ncnn::Mat in_pad; | ||
| 309 | + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f); | ||
| 310 | + | ||
| 311 | + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; | ||
| 312 | + in_pad.substract_mean_normalize(0, norm_vals); | ||
| 313 | + | ||
| 314 | + ncnn::Extractor ex = yolov8.create_extractor(); | ||
| 315 | + | ||
| 316 | + ex.input("in0", in_pad); | ||
| 317 | + | ||
| 318 | + ncnn::Mat out; | ||
| 319 | + ex.extract("out0", out); | ||
| 320 | + | ||
| 321 | + std::vector<Object> proposals; | ||
| 322 | + generate_proposals(out, strides, in_pad, prob_threshold, proposals); | ||
| 323 | + | ||
| 324 | + // sort all proposals by score from highest to lowest | ||
| 325 | + qsort_descent_inplace(proposals); | ||
| 326 | + | ||
| 327 | + // apply nms with nms_threshold | ||
| 328 | + std::vector<int> picked; | ||
| 329 | + nms_sorted_bboxes(proposals, picked, nms_threshold); | ||
| 330 | + | ||
| 331 | + int count = picked.size(); | ||
| 332 | + | ||
| 333 | + objects.resize(count); | ||
| 334 | + for (int i = 0; i < count; i++) | ||
| 335 | + { | ||
| 336 | + objects[i] = proposals[picked[i]]; | ||
| 337 | + | ||
| 338 | + // adjust offset to original unpadded | ||
| 339 | + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; | ||
| 340 | + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; | ||
| 341 | + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; | ||
| 342 | + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; | ||
| 343 | + | ||
| 344 | + // clip | ||
| 345 | + x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); | ||
| 346 | + y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); | ||
| 347 | + x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); | ||
| 348 | + y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); | ||
| 349 | + | ||
| 350 | + objects[i].rect.x = x0; | ||
| 351 | + objects[i].rect.y = y0; | ||
| 352 | + objects[i].rect.width = x1 - x0; | ||
| 353 | + objects[i].rect.height = y1 - y0; | ||
| 354 | + } | ||
| 355 | + | ||
| 356 | + // sort objects by area | ||
| 357 | + struct | ||
| 358 | + { | ||
| 359 | + bool operator()(const Object& a, const Object& b) const | ||
| 360 | + { | ||
| 361 | + return a.rect.area() > b.rect.area(); | ||
| 362 | + } | ||
| 363 | + } objects_area_greater; | ||
| 364 | + std::sort(objects.begin(), objects.end(), objects_area_greater); | ||
| 365 | + | ||
| 366 | + return 0; | ||
| 367 | +} | ||
| 368 | + | ||
| 369 | +int YOLOv8_det_coco::draw(cv::Mat& rgb, const std::vector<Object>& objects) | ||
| 370 | +{ | ||
| 371 | + static const char* class_names[] = { | ||
| 372 | + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", | ||
| 373 | + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", | ||
| 374 | + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", | ||
| 375 | + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", | ||
| 376 | + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", | ||
| 377 | + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", | ||
| 378 | + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", | ||
| 379 | + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", | ||
| 380 | + "hair drier", "toothbrush" | ||
| 381 | + }; | ||
| 382 | + | ||
| 383 | + static cv::Scalar colors[] = { | ||
| 384 | + cv::Scalar( 67, 54, 244), | ||
| 385 | + cv::Scalar( 30, 99, 233), | ||
| 386 | + cv::Scalar( 39, 176, 156), | ||
| 387 | + cv::Scalar( 58, 183, 103), | ||
| 388 | + cv::Scalar( 81, 181, 63), | ||
| 389 | + cv::Scalar(150, 243, 33), | ||
| 390 | + cv::Scalar(169, 244, 3), | ||
| 391 | + cv::Scalar(188, 212, 0), | ||
| 392 | + cv::Scalar(150, 136, 0), | ||
| 393 | + cv::Scalar(175, 80, 76), | ||
| 394 | + cv::Scalar(195, 74, 139), | ||
| 395 | + cv::Scalar(220, 57, 205), | ||
| 396 | + cv::Scalar(235, 59, 255), | ||
| 397 | + cv::Scalar(193, 7, 255), | ||
| 398 | + cv::Scalar(152, 0, 255), | ||
| 399 | + cv::Scalar( 87, 34, 255), | ||
| 400 | + cv::Scalar( 85, 72, 121), | ||
| 401 | + cv::Scalar(158, 158, 158), | ||
| 402 | + cv::Scalar(125, 139, 96) | ||
| 403 | + }; | ||
| 404 | + | ||
| 405 | + for (size_t i = 0; i < objects.size(); i++) | ||
| 406 | + { | ||
| 407 | + const Object& obj = objects[i]; | ||
| 408 | + | ||
| 409 | + const cv::Scalar& color = colors[i % 19]; | ||
| 410 | + | ||
| 411 | + // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, | ||
| 412 | + // obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); | ||
| 413 | + | ||
| 414 | + cv::rectangle(rgb, obj.rect, color); | ||
| 415 | + | ||
| 416 | + char text[256]; | ||
| 417 | + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); | ||
| 418 | + | ||
| 419 | + int baseLine = 0; | ||
| 420 | + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); | ||
| 421 | + | ||
| 422 | + int x = obj.rect.x; | ||
| 423 | + int y = obj.rect.y - label_size.height - baseLine; | ||
| 424 | + if (y < 0) | ||
| 425 | + y = 0; | ||
| 426 | + if (x + label_size.width > rgb.cols) | ||
| 427 | + x = rgb.cols - label_size.width; | ||
| 428 | + | ||
| 429 | + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), | ||
| 430 | + cv::Scalar(255, 255, 255), -1); | ||
| 431 | + | ||
| 432 | + cv::putText(rgb, text, cv::Point(x, y + label_size.height), | ||
| 433 | + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); | ||
| 434 | + } | ||
| 435 | + | ||
| 436 | + return 0; | ||
| 437 | +} | ||
| 438 | + | ||
| 439 | +int YOLOv8_det_oiv7::draw(cv::Mat& rgb, const std::vector<Object>& objects) | ||
| 440 | +{ | ||
| 441 | + static const char* class_names[] = { | ||
| 442 | + "Accordion", "Adhesive tape", "Aircraft", "Airplane", "Alarm clock", "Alpaca", "Ambulance", "Animal", | ||
| 443 | + "Ant", "Antelope", "Apple", "Armadillo", "Artichoke", "Auto part", "Axe", "Backpack", "Bagel", | ||
| 444 | + "Baked goods", "Balance beam", "Ball", "Balloon", "Banana", "Band-aid", "Banjo", "Barge", "Barrel", | ||
| 445 | + "Baseball bat", "Baseball glove", "Bat (Animal)", "Bathroom accessory", "Bathroom cabinet", "Bathtub", | ||
| 446 | + "Beaker", "Bear", "Bed", "Bee", "Beehive", "Beer", "Beetle", "Bell pepper", "Belt", "Bench", "Bicycle", | ||
| 447 | + "Bicycle helmet", "Bicycle wheel", "Bidet", "Billboard", "Billiard table", "Binoculars", "Bird", | ||
| 448 | + "Blender", "Blue jay", "Boat", "Bomb", "Book", "Bookcase", "Boot", "Bottle", "Bottle opener", | ||
| 449 | + "Bow and arrow", "Bowl", "Bowling equipment", "Box", "Boy", "Brassiere", "Bread", "Briefcase", | ||
| 450 | + "Broccoli", "Bronze sculpture", "Brown bear", "Building", "Bull", "Burrito", "Bus", "Bust", "Butterfly", | ||
| 451 | + "Cabbage", "Cabinetry", "Cake", "Cake stand", "Calculator", "Camel", "Camera", "Can opener", "Canary", | ||
| 452 | + "Candle", "Candy", "Cannon", "Canoe", "Cantaloupe", "Car", "Carnivore", "Carrot", "Cart", "Cassette deck", | ||
| 453 | + "Castle", "Cat", "Cat furniture", "Caterpillar", "Cattle", "Ceiling fan", "Cello", "Centipede", | ||
| 454 | + "Chainsaw", "Chair", "Cheese", "Cheetah", "Chest of drawers", "Chicken", "Chime", "Chisel", "Chopsticks", | ||
| 455 | + "Christmas tree", "Clock", "Closet", "Clothing", "Coat", "Cocktail", "Cocktail shaker", "Coconut", | ||
| 456 | + "Coffee", "Coffee cup", "Coffee table", "Coffeemaker", "Coin", "Common fig", "Common sunflower", | ||
| 457 | + "Computer keyboard", "Computer monitor", "Computer mouse", "Container", "Convenience store", "Cookie", | ||
| 458 | + "Cooking spray", "Corded phone", "Cosmetics", "Couch", "Countertop", "Cowboy hat", "Crab", "Cream", | ||
| 459 | + "Cricket ball", "Crocodile", "Croissant", "Crown", "Crutch", "Cucumber", "Cupboard", "Curtain", | ||
| 460 | + "Cutting board", "Dagger", "Dairy Product", "Deer", "Desk", "Dessert", "Diaper", "Dice", "Digital clock", | ||
| 461 | + "Dinosaur", "Dishwasher", "Dog", "Dog bed", "Doll", "Dolphin", "Door", "Door handle", "Doughnut", | ||
| 462 | + "Dragonfly", "Drawer", "Dress", "Drill (Tool)", "Drink", "Drinking straw", "Drum", "Duck", "Dumbbell", | ||
| 463 | + "Eagle", "Earrings", "Egg (Food)", "Elephant", "Envelope", "Eraser", "Face powder", "Facial tissue holder", | ||
| 464 | + "Falcon", "Fashion accessory", "Fast food", "Fax", "Fedora", "Filing cabinet", "Fire hydrant", | ||
| 465 | + "Fireplace", "Fish", "Flag", "Flashlight", "Flower", "Flowerpot", "Flute", "Flying disc", "Food", | ||
| 466 | + "Food processor", "Football", "Football helmet", "Footwear", "Fork", "Fountain", "Fox", "French fries", | ||
| 467 | + "French horn", "Frog", "Fruit", "Frying pan", "Furniture", "Garden Asparagus", "Gas stove", "Giraffe", | ||
| 468 | + "Girl", "Glasses", "Glove", "Goat", "Goggles", "Goldfish", "Golf ball", "Golf cart", "Gondola", | ||
| 469 | + "Goose", "Grape", "Grapefruit", "Grinder", "Guacamole", "Guitar", "Hair dryer", "Hair spray", "Hamburger", | ||
| 470 | + "Hammer", "Hamster", "Hand dryer", "Handbag", "Handgun", "Harbor seal", "Harmonica", "Harp", | ||
| 471 | + "Harpsichord", "Hat", "Headphones", "Heater", "Hedgehog", "Helicopter", "Helmet", "High heels", | ||
| 472 | + "Hiking equipment", "Hippopotamus", "Home appliance", "Honeycomb", "Horizontal bar", "Horse", "Hot dog", | ||
| 473 | + "House", "Houseplant", "Human arm", "Human beard", "Human body", "Human ear", "Human eye", "Human face", | ||
| 474 | + "Human foot", "Human hair", "Human hand", "Human head", "Human leg", "Human mouth", "Human nose", | ||
| 475 | + "Humidifier", "Ice cream", "Indoor rower", "Infant bed", "Insect", "Invertebrate", "Ipod", "Isopod", | ||
| 476 | + "Jacket", "Jacuzzi", "Jaguar (Animal)", "Jeans", "Jellyfish", "Jet ski", "Jug", "Juice", "Kangaroo", | ||
| 477 | + "Kettle", "Kitchen & dining room table", "Kitchen appliance", "Kitchen knife", "Kitchen utensil", | ||
| 478 | + "Kitchenware", "Kite", "Knife", "Koala", "Ladder", "Ladle", "Ladybug", "Lamp", "Land vehicle", | ||
| 479 | + "Lantern", "Laptop", "Lavender (Plant)", "Lemon", "Leopard", "Light bulb", "Light switch", "Lighthouse", | ||
| 480 | + "Lily", "Limousine", "Lion", "Lipstick", "Lizard", "Lobster", "Loveseat", "Luggage and bags", "Lynx", | ||
| 481 | + "Magpie", "Mammal", "Man", "Mango", "Maple", "Maracas", "Marine invertebrates", "Marine mammal", | ||
| 482 | + "Measuring cup", "Mechanical fan", "Medical equipment", "Microphone", "Microwave oven", "Milk", | ||
| 483 | + "Miniskirt", "Mirror", "Missile", "Mixer", "Mixing bowl", "Mobile phone", "Monkey", "Moths and butterflies", | ||
| 484 | + "Motorcycle", "Mouse", "Muffin", "Mug", "Mule", "Mushroom", "Musical instrument", "Musical keyboard", | ||
| 485 | + "Nail (Construction)", "Necklace", "Nightstand", "Oboe", "Office building", "Office supplies", "Orange", | ||
| 486 | + "Organ (Musical Instrument)", "Ostrich", "Otter", "Oven", "Owl", "Oyster", "Paddle", "Palm tree", | ||
| 487 | + "Pancake", "Panda", "Paper cutter", "Paper towel", "Parachute", "Parking meter", "Parrot", "Pasta", | ||
| 488 | + "Pastry", "Peach", "Pear", "Pen", "Pencil case", "Pencil sharpener", "Penguin", "Perfume", "Person", | ||
| 489 | + "Personal care", "Personal flotation device", "Piano", "Picnic basket", "Picture frame", "Pig", | ||
| 490 | + "Pillow", "Pineapple", "Pitcher (Container)", "Pizza", "Pizza cutter", "Plant", "Plastic bag", "Plate", | ||
| 491 | + "Platter", "Plumbing fixture", "Polar bear", "Pomegranate", "Popcorn", "Porch", "Porcupine", "Poster", | ||
| 492 | + "Potato", "Power plugs and sockets", "Pressure cooker", "Pretzel", "Printer", "Pumpkin", "Punching bag", | ||
| 493 | + "Rabbit", "Raccoon", "Racket", "Radish", "Ratchet (Device)", "Raven", "Rays and skates", "Red panda", | ||
| 494 | + "Refrigerator", "Remote control", "Reptile", "Rhinoceros", "Rifle", "Ring binder", "Rocket", | ||
| 495 | + "Roller skates", "Rose", "Rugby ball", "Ruler", "Salad", "Salt and pepper shakers", "Sandal", | ||
| 496 | + "Sandwich", "Saucer", "Saxophone", "Scale", "Scarf", "Scissors", "Scoreboard", "Scorpion", | ||
| 497 | + "Screwdriver", "Sculpture", "Sea lion", "Sea turtle", "Seafood", "Seahorse", "Seat belt", "Segway", | ||
| 498 | + "Serving tray", "Sewing machine", "Shark", "Sheep", "Shelf", "Shellfish", "Shirt", "Shorts", | ||
| 499 | + "Shotgun", "Shower", "Shrimp", "Sink", "Skateboard", "Ski", "Skirt", "Skull", "Skunk", "Skyscraper", | ||
| 500 | + "Slow cooker", "Snack", "Snail", "Snake", "Snowboard", "Snowman", "Snowmobile", "Snowplow", | ||
| 501 | + "Soap dispenser", "Sock", "Sofa bed", "Sombrero", "Sparrow", "Spatula", "Spice rack", "Spider", | ||
| 502 | + "Spoon", "Sports equipment", "Sports uniform", "Squash (Plant)", "Squid", "Squirrel", "Stairs", | ||
| 503 | + "Stapler", "Starfish", "Stationary bicycle", "Stethoscope", "Stool", "Stop sign", "Strawberry", | ||
| 504 | + "Street light", "Stretcher", "Studio couch", "Submarine", "Submarine sandwich", "Suit", "Suitcase", | ||
| 505 | + "Sun hat", "Sunglasses", "Surfboard", "Sushi", "Swan", "Swim cap", "Swimming pool", "Swimwear", | ||
| 506 | + "Sword", "Syringe", "Table", "Table tennis racket", "Tablet computer", "Tableware", "Taco", "Tank", | ||
| 507 | + "Tap", "Tart", "Taxi", "Tea", "Teapot", "Teddy bear", "Telephone", "Television", "Tennis ball", | ||
| 508 | + "Tennis racket", "Tent", "Tiara", "Tick", "Tie", "Tiger", "Tin can", "Tire", "Toaster", "Toilet", | ||
| 509 | + "Toilet paper", "Tomato", "Tool", "Toothbrush", "Torch", "Tortoise", "Towel", "Tower", "Toy", | ||
| 510 | + "Traffic light", "Traffic sign", "Train", "Training bench", "Treadmill", "Tree", "Tree house", | ||
| 511 | + "Tripod", "Trombone", "Trousers", "Truck", "Trumpet", "Turkey", "Turtle", "Umbrella", "Unicycle", | ||
| 512 | + "Van", "Vase", "Vegetable", "Vehicle", "Vehicle registration plate", "Violin", "Volleyball (Ball)", | ||
| 513 | + "Waffle", "Waffle iron", "Wall clock", "Wardrobe", "Washing machine", "Waste container", "Watch", | ||
| 514 | + "Watercraft", "Watermelon", "Weapon", "Whale", "Wheel", "Wheelchair", "Whisk", "Whiteboard", "Willow", | ||
| 515 | + "Window", "Window blind", "Wine", "Wine glass", "Wine rack", "Winter melon", "Wok", "Woman", | ||
| 516 | + "Wood-burning stove", "Woodpecker", "Worm", "Wrench", "Zebra", "Zucchini" | ||
| 517 | + }; | ||
| 518 | + | ||
| 519 | + static cv::Scalar colors[] = { | ||
| 520 | + cv::Scalar( 67, 54, 244), | ||
| 521 | + cv::Scalar( 30, 99, 233), | ||
| 522 | + cv::Scalar( 39, 176, 156), | ||
| 523 | + cv::Scalar( 58, 183, 103), | ||
| 524 | + cv::Scalar( 81, 181, 63), | ||
| 525 | + cv::Scalar(150, 243, 33), | ||
| 526 | + cv::Scalar(169, 244, 3), | ||
| 527 | + cv::Scalar(188, 212, 0), | ||
| 528 | + cv::Scalar(150, 136, 0), | ||
| 529 | + cv::Scalar(175, 80, 76), | ||
| 530 | + cv::Scalar(195, 74, 139), | ||
| 531 | + cv::Scalar(220, 57, 205), | ||
| 532 | + cv::Scalar(235, 59, 255), | ||
| 533 | + cv::Scalar(193, 7, 255), | ||
| 534 | + cv::Scalar(152, 0, 255), | ||
| 535 | + cv::Scalar( 87, 34, 255), | ||
| 536 | + cv::Scalar( 85, 72, 121), | ||
| 537 | + cv::Scalar(158, 158, 158), | ||
| 538 | + cv::Scalar(125, 139, 96) | ||
| 539 | + }; | ||
| 540 | + | ||
| 541 | + for (size_t i = 0; i < objects.size(); i++) | ||
| 542 | + { | ||
| 543 | + const Object& obj = objects[i]; | ||
| 544 | + | ||
| 545 | + const cv::Scalar& color = colors[i % 19]; | ||
| 546 | + | ||
| 547 | + // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, | ||
| 548 | + // obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); | ||
| 549 | + | ||
| 550 | + cv::rectangle(rgb, obj.rect, color); | ||
| 551 | + | ||
| 552 | + char text[256]; | ||
| 553 | + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); | ||
| 554 | + | ||
| 555 | + int baseLine = 0; | ||
| 556 | + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); | ||
| 557 | + | ||
| 558 | + int x = obj.rect.x; | ||
| 559 | + int y = obj.rect.y - label_size.height - baseLine; | ||
| 560 | + if (y < 0) | ||
| 561 | + y = 0; | ||
| 562 | + if (x + label_size.width > rgb.cols) | ||
| 563 | + x = rgb.cols - label_size.width; | ||
| 564 | + | ||
| 565 | + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), | ||
| 566 | + cv::Scalar(255, 255, 255), -1); | ||
| 567 | + | ||
| 568 | + cv::putText(rgb, text, cv::Point(x, y + label_size.height), | ||
| 569 | + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); | ||
| 570 | + } | ||
| 571 | + | ||
| 572 | + return 0; | ||
| 573 | +} |
| 1 | +// Tencent is pleased to support the open source community by making ncnn available. | ||
| 2 | +// | ||
| 3 | +// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. | ||
| 4 | +// | ||
| 5 | +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | ||
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 7 | +// | ||
| 8 | +// https://opensource.org/licenses/BSD-3-Clause | ||
| 9 | +// | ||
| 10 | +// Unless required by applicable law or agreed to in writing, software distributed | ||
| 11 | +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| 12 | +// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| 13 | +// specific language governing permissions and limitations under the License. | ||
| 14 | + | ||
| 15 | +// 1. install | ||
| 16 | +// pip3 install -U ultralytics pnnx ncnn | ||
| 17 | +// 2. export yolov8-obb torchscript | ||
| 18 | +// yolo export model=yolov8n-obb.pt format=torchscript | ||
| 19 | +// 3. convert torchscript with static shape | ||
| 20 | +// pnnx yolov8n-obb.torchscript | ||
| 21 | +// 4. modify yolov8n_obb_pnnx.py for dynamic shape inference | ||
| 22 | +// A. modify reshape to support dynamic image sizes | ||
| 23 | +// B. permute tensor before concat and adjust concat axis | ||
| 24 | +// C. drop post-process part | ||
| 25 | +// before: | ||
| 26 | +// v_137 = v_136.view(1, 1, 16384) | ||
| 27 | +// v_143 = v_142.view(1, 1, 4096) | ||
| 28 | +// v_149 = v_148.view(1, 1, 1024) | ||
| 29 | +// v_150 = torch.cat((v_137, v_143, v_149), dim=2) | ||
| 30 | +// ... | ||
| 31 | +// v_186 = v_163.view(1, 79, 16384) | ||
| 32 | +// v_187 = v_174.view(1, 79, 4096) | ||
| 33 | +// v_188 = v_185.view(1, 79, 1024) | ||
| 34 | +// v_189 = torch.cat((v_186, v_187, v_188), dim=2) | ||
| 35 | +// ... | ||
| 36 | +// after: | ||
| 37 | +// v_137 = v_136.view(1, 1, -1).transpose(1, 2) | ||
| 38 | +// v_143 = v_142.view(1, 1, -1).transpose(1, 2) | ||
| 39 | +// v_149 = v_148.view(1, 1, -1).transpose(1, 2) | ||
| 40 | +// v_150 = torch.cat((v_137, v_143, v_149), dim=1) | ||
| 41 | +// ... | ||
| 42 | +// v_186 = v_163.view(1, 79, -1).transpose(1, 2) | ||
| 43 | +// v_187 = v_174.view(1, 79, -1).transpose(1, 2) | ||
| 44 | +// v_188 = v_185.view(1, 79, -1).transpose(1, 2) | ||
| 45 | +// v_189 = torch.cat((v_186, v_187, v_188), dim=1) | ||
| 46 | +// return v_189, v_150 | ||
| 47 | +// 5. re-export yolov8-obb torchscript | ||
| 48 | +// python3 -c 'import yolov8n_obb_pnnx; yolov8n_obb_pnnx.export_torchscript()' | ||
| 49 | +// 6. convert new torchscript with dynamic shape | ||
| 50 | +// pnnx yolov8n_obb_pnnx.py.pt inputshape=[1,3,1024,1024] inputshape2=[1,3,512,512] | ||
| 51 | +// 7. now you get ncnn model files | ||
| 52 | +// mv yolov8n_obb_pnnx.py.ncnn.param yolov8n_obb.ncnn.param | ||
| 53 | +// mv yolov8n_obb_pnnx.py.ncnn.bin yolov8n_obb.ncnn.bin | ||
| 54 | + | ||
| 55 | +// the out blob would be a 2-dim tensor with w=79 h=21504 | ||
| 56 | +// | ||
| 57 | +// | bbox-reg 16 x 4 |score(15)| | ||
| 58 | +// +-----+-----+-----+-----+---------+ | ||
| 59 | +// | dx0 | dy0 | dx1 | dy1 | 0.1 ... | | ||
| 60 | +// all /| | | | | ... | | ||
| 61 | +// boxes | .. | .. | .. | .. | 0.0 ... | | ||
| 62 | +// (21504)| | | | | . ... | | ||
| 63 | +// \| | | | | . ... | | ||
| 64 | +// +-----+-----+-----+-----+---------+ | ||
| 65 | +// | ||
| 66 | + | ||
| 67 | +// the out blob would be a 2-dim tensor with w=1 h=21504 | ||
| 68 | +// | ||
| 69 | +// | degree(1)| | ||
| 70 | +// +----------+ | ||
| 71 | +// | 0.1 | | ||
| 72 | +// all /| | | ||
| 73 | +// boxes | 0.0 | | ||
| 74 | +// (21504)| . | | ||
| 75 | +// \| . | | ||
| 76 | +// +----------+ | ||
| 77 | +// | ||
| 78 | + | ||
| 79 | +#include "yolov8.h" | ||
| 80 | + | ||
| 81 | +#include "layer.h" | ||
| 82 | + | ||
| 83 | +#include <opencv2/core/core.hpp> | ||
| 84 | +#include <opencv2/imgproc/imgproc.hpp> | ||
| 85 | + | ||
| 86 | +#include <float.h> | ||
| 87 | +#include <stdio.h> | ||
| 88 | +#include <vector> | ||
| 89 | + | ||
| 90 | +static inline float intersection_area(const Object& a, const Object& b) | ||
| 91 | +{ | ||
| 92 | + std::vector<cv::Point2f> intersection; | ||
| 93 | + cv::rotatedRectangleIntersection(a.rrect, b.rrect, intersection); | ||
| 94 | + if (intersection.empty()) | ||
| 95 | + return 0.f; | ||
| 96 | + | ||
| 97 | + return cv::contourArea(intersection); | ||
| 98 | +} | ||
| 99 | + | ||
| 100 | +static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right) | ||
| 101 | +{ | ||
| 102 | + int i = left; | ||
| 103 | + int j = right; | ||
| 104 | + float p = objects[(left + right) / 2].prob; | ||
| 105 | + | ||
| 106 | + while (i <= j) | ||
| 107 | + { | ||
| 108 | + while (objects[i].prob > p) | ||
| 109 | + i++; | ||
| 110 | + | ||
| 111 | + while (objects[j].prob < p) | ||
| 112 | + j--; | ||
| 113 | + | ||
| 114 | + if (i <= j) | ||
| 115 | + { | ||
| 116 | + // swap | ||
| 117 | + std::swap(objects[i], objects[j]); | ||
| 118 | + | ||
| 119 | + i++; | ||
| 120 | + j--; | ||
| 121 | + } | ||
| 122 | + } | ||
| 123 | + | ||
| 124 | + // #pragma omp parallel sections | ||
| 125 | + { | ||
| 126 | + // #pragma omp section | ||
| 127 | + { | ||
| 128 | + if (left < j) qsort_descent_inplace(objects, left, j); | ||
| 129 | + } | ||
| 130 | + // #pragma omp section | ||
| 131 | + { | ||
| 132 | + if (i < right) qsort_descent_inplace(objects, i, right); | ||
| 133 | + } | ||
| 134 | + } | ||
| 135 | +} | ||
| 136 | + | ||
| 137 | +static void qsort_descent_inplace(std::vector<Object>& objects) | ||
| 138 | +{ | ||
| 139 | + if (objects.empty()) | ||
| 140 | + return; | ||
| 141 | + | ||
| 142 | + qsort_descent_inplace(objects, 0, objects.size() - 1); | ||
| 143 | +} | ||
| 144 | + | ||
| 145 | +static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) | ||
| 146 | +{ | ||
| 147 | + picked.clear(); | ||
| 148 | + | ||
| 149 | + const int n = objects.size(); | ||
| 150 | + | ||
| 151 | + std::vector<float> areas(n); | ||
| 152 | + for (int i = 0; i < n; i++) | ||
| 153 | + { | ||
| 154 | + areas[i] = objects[i].rrect.size.area(); | ||
| 155 | + } | ||
| 156 | + | ||
| 157 | + for (int i = 0; i < n; i++) | ||
| 158 | + { | ||
| 159 | + const Object& a = objects[i]; | ||
| 160 | + | ||
| 161 | + int keep = 1; | ||
| 162 | + for (int j = 0; j < (int)picked.size(); j++) | ||
| 163 | + { | ||
| 164 | + const Object& b = objects[picked[j]]; | ||
| 165 | + | ||
| 166 | + if (!agnostic && a.label != b.label) | ||
| 167 | + continue; | ||
| 168 | + | ||
| 169 | + // intersection over union | ||
| 170 | + float inter_area = intersection_area(a, b); | ||
| 171 | + float union_area = areas[i] + areas[picked[j]] - inter_area; | ||
| 172 | + // float IoU = inter_area / union_area; | ||
| 173 | + if (inter_area / union_area > nms_threshold) | ||
| 174 | + keep = 0; | ||
| 175 | + } | ||
| 176 | + | ||
| 177 | + if (keep) | ||
| 178 | + picked.push_back(i); | ||
| 179 | + } | ||
| 180 | +} | ||
| 181 | + | ||
| 182 | +static inline float sigmoid(float x) | ||
| 183 | +{ | ||
| 184 | + return 1.0f / (1.0f + expf(-x)); | ||
| 185 | +} | ||
| 186 | + | ||
| 187 | +static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_angle, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects) | ||
| 188 | +{ | ||
| 189 | + const int w = in_pad.w; | ||
| 190 | + const int h = in_pad.h; | ||
| 191 | + | ||
| 192 | + const int num_grid_x = w / stride; | ||
| 193 | + const int num_grid_y = h / stride; | ||
| 194 | + | ||
| 195 | + const int reg_max_1 = 16; | ||
| 196 | + const int num_class = pred.w - reg_max_1 * 4; // number of classes. 15 for DOTAv1 | ||
| 197 | + | ||
| 198 | + for (int y = 0; y < num_grid_y; y++) | ||
| 199 | + { | ||
| 200 | + for (int x = 0; x < num_grid_x; x++) | ||
| 201 | + { | ||
| 202 | + const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1); | ||
| 203 | + | ||
| 204 | + // find label with max score | ||
| 205 | + int label = -1; | ||
| 206 | + float score = -FLT_MAX; | ||
| 207 | + { | ||
| 208 | + const ncnn::Mat pred_score = pred_grid.range(reg_max_1 * 4, num_class); | ||
| 209 | + | ||
| 210 | + for (int k = 0; k < num_class; k++) | ||
| 211 | + { | ||
| 212 | + float s = pred_score[k]; | ||
| 213 | + if (s > score) | ||
| 214 | + { | ||
| 215 | + label = k; | ||
| 216 | + score = s; | ||
| 217 | + } | ||
| 218 | + } | ||
| 219 | + | ||
| 220 | + score = sigmoid(score); | ||
| 221 | + } | ||
| 222 | + | ||
| 223 | + if (score >= prob_threshold) | ||
| 224 | + { | ||
| 225 | + ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4).clone(); | ||
| 226 | + | ||
| 227 | + { | ||
| 228 | + ncnn::Layer* softmax = ncnn::create_layer("Softmax"); | ||
| 229 | + | ||
| 230 | + ncnn::ParamDict pd; | ||
| 231 | + pd.set(0, 1); // axis | ||
| 232 | + pd.set(1, 1); | ||
| 233 | + softmax->load_param(pd); | ||
| 234 | + | ||
| 235 | + ncnn::Option opt; | ||
| 236 | + opt.num_threads = 1; | ||
| 237 | + opt.use_packing_layout = false; | ||
| 238 | + | ||
| 239 | + softmax->create_pipeline(opt); | ||
| 240 | + | ||
| 241 | + softmax->forward_inplace(pred_bbox, opt); | ||
| 242 | + | ||
| 243 | + softmax->destroy_pipeline(opt); | ||
| 244 | + | ||
| 245 | + delete softmax; | ||
| 246 | + } | ||
| 247 | + | ||
| 248 | + float pred_ltrb[4]; | ||
| 249 | + for (int k = 0; k < 4; k++) | ||
| 250 | + { | ||
| 251 | + float dis = 0.f; | ||
| 252 | + const float* dis_after_sm = pred_bbox.row(k); | ||
| 253 | + for (int l = 0; l < reg_max_1; l++) | ||
| 254 | + { | ||
| 255 | + dis += l * dis_after_sm[l]; | ||
| 256 | + } | ||
| 257 | + | ||
| 258 | + pred_ltrb[k] = dis * stride; | ||
| 259 | + } | ||
| 260 | + | ||
| 261 | + float pb_cx = (x + 0.5f) * stride; | ||
| 262 | + float pb_cy = (y + 0.5f) * stride; | ||
| 263 | + | ||
| 264 | + const float angle = sigmoid(pred_angle.row(y * num_grid_x + x)[0]) - 0.25f; | ||
| 265 | + | ||
| 266 | + const float angle_rad = angle * 3.14159265358979323846f; | ||
| 267 | + const float angle_degree = angle * 180.f; | ||
| 268 | + | ||
| 269 | + float cos = cosf(angle_rad); | ||
| 270 | + float sin = sinf(angle_rad); | ||
| 271 | + | ||
| 272 | + float xx = (pred_ltrb[2] - pred_ltrb[0]) * 0.5f; | ||
| 273 | + float yy = (pred_ltrb[3] - pred_ltrb[1]) * 0.5f; | ||
| 274 | + float xr = xx * cos - yy * sin; | ||
| 275 | + float yr = xx * sin + yy * cos; | ||
| 276 | + const float cx = pb_cx + xr; | ||
| 277 | + const float cy = pb_cy + yr; | ||
| 278 | + const float ww = pred_ltrb[2] + pred_ltrb[0]; | ||
| 279 | + const float hh = pred_ltrb[3] + pred_ltrb[1]; | ||
| 280 | + | ||
| 281 | + Object obj; | ||
| 282 | + obj.rrect = cv::RotatedRect(cv::Point2f(cx, cy), cv::Size_<float>(ww, hh), angle_degree); | ||
| 283 | + obj.label = label; | ||
| 284 | + obj.prob = score; | ||
| 285 | + | ||
| 286 | + objects.push_back(obj); | ||
| 287 | + } | ||
| 288 | + } | ||
| 289 | + } | ||
| 290 | +} | ||
| 291 | + | ||
| 292 | +static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_angle, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects) | ||
| 293 | +{ | ||
| 294 | + const int w = in_pad.w; | ||
| 295 | + const int h = in_pad.h; | ||
| 296 | + | ||
| 297 | + int pred_row_offset = 0; | ||
| 298 | + for (size_t i = 0; i < strides.size(); i++) | ||
| 299 | + { | ||
| 300 | + const int stride = strides[i]; | ||
| 301 | + | ||
| 302 | + const int num_grid_x = w / stride; | ||
| 303 | + const int num_grid_y = h / stride; | ||
| 304 | + const int num_grid = num_grid_x * num_grid_y; | ||
| 305 | + | ||
| 306 | + generate_proposals(pred.row_range(pred_row_offset, num_grid), pred_angle.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects); | ||
| 307 | + | ||
| 308 | + pred_row_offset += num_grid; | ||
| 309 | + } | ||
| 310 | +} | ||
| 311 | + | ||
| 312 | +int YOLOv8_obb::detect(const cv::Mat& rgb, std::vector<Object>& objects) | ||
| 313 | +{ | ||
| 314 | + const int target_size = det_target_size;//1024; | ||
| 315 | + const float prob_threshold = 0.25f; | ||
| 316 | + const float nms_threshold = 0.45f; | ||
| 317 | + | ||
| 318 | + int img_w = rgb.cols; | ||
| 319 | + int img_h = rgb.rows; | ||
| 320 | + | ||
| 321 | + // ultralytics/cfg/models/v8/yolov8.yaml | ||
| 322 | + std::vector<int> strides(3); | ||
| 323 | + strides[0] = 8; | ||
| 324 | + strides[1] = 16; | ||
| 325 | + strides[2] = 32; | ||
| 326 | + const int max_stride = 32; | ||
| 327 | + | ||
| 328 | + // letterbox pad to multiple of max_stride | ||
| 329 | + int w = img_w; | ||
| 330 | + int h = img_h; | ||
| 331 | + float scale = 1.f; | ||
| 332 | + if (w > h) | ||
| 333 | + { | ||
| 334 | + scale = (float)target_size / w; | ||
| 335 | + w = target_size; | ||
| 336 | + h = h * scale; | ||
| 337 | + } | ||
| 338 | + else | ||
| 339 | + { | ||
| 340 | + scale = (float)target_size / h; | ||
| 341 | + h = target_size; | ||
| 342 | + w = w * scale; | ||
| 343 | + } | ||
| 344 | + | ||
| 345 | + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h); | ||
| 346 | + | ||
| 347 | + // letterbox pad to target_size rectangle | ||
| 348 | + int wpad = (w + max_stride - 1) / max_stride * max_stride - w; | ||
| 349 | + int hpad = (h + max_stride - 1) / max_stride * max_stride - h; | ||
| 350 | + ncnn::Mat in_pad; | ||
| 351 | + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f); | ||
| 352 | + | ||
| 353 | + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; | ||
| 354 | + in_pad.substract_mean_normalize(0, norm_vals); | ||
| 355 | + | ||
| 356 | + ncnn::Extractor ex = yolov8.create_extractor(); | ||
| 357 | + | ||
| 358 | + ex.input("in0", in_pad); | ||
| 359 | + | ||
| 360 | + ncnn::Mat out; | ||
| 361 | + ex.extract("out0", out); | ||
| 362 | + | ||
| 363 | + ncnn::Mat out_angle; | ||
| 364 | + ex.extract("out1", out_angle); | ||
| 365 | + | ||
| 366 | + std::vector<Object> proposals; | ||
| 367 | + generate_proposals(out, out_angle, strides, in_pad, prob_threshold, proposals); | ||
| 368 | + | ||
| 369 | + // sort all proposals by score from highest to lowest | ||
| 370 | + qsort_descent_inplace(proposals); | ||
| 371 | + | ||
| 372 | + // apply nms with nms_threshold | ||
| 373 | + std::vector<int> picked; | ||
| 374 | + nms_sorted_bboxes(proposals, picked, nms_threshold); | ||
| 375 | + | ||
| 376 | + int count = picked.size(); | ||
| 377 | + if (count == 0) | ||
| 378 | + return 0; | ||
| 379 | + | ||
| 380 | + objects.resize(count); | ||
| 381 | + for (int i = 0; i < count; i++) | ||
| 382 | + { | ||
| 383 | + Object obj = proposals[picked[i]]; | ||
| 384 | + | ||
| 385 | + // adjust offset to original unpadded | ||
| 386 | + obj.rrect.center.x = (obj.rrect.center.x - (wpad / 2)) / scale; | ||
| 387 | + obj.rrect.center.y = (obj.rrect.center.y - (hpad / 2)) / scale; | ||
| 388 | + obj.rrect.size.width = (obj.rrect.size.width) / scale; | ||
| 389 | + obj.rrect.size.height = (obj.rrect.size.height) / scale; | ||
| 390 | + | ||
| 391 | + objects[i] = obj; | ||
| 392 | + } | ||
| 393 | + | ||
| 394 | + return 0; | ||
| 395 | +} | ||
| 396 | + | ||
| 397 | +int YOLOv8_obb::draw(cv::Mat& rgb, const std::vector<Object>& objects) | ||
| 398 | +{ | ||
| 399 | + static const char* class_names[] = { | ||
| 400 | + "plane", "ship", "storage tank", "baseball diamond", "tennis court", | ||
| 401 | + "basketball court", "ground track field", "harbor", "bridge", "large vehicle", | ||
| 402 | + "small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool" | ||
| 403 | + }; | ||
| 404 | + | ||
| 405 | + static const cv::Scalar colors[] = { | ||
| 406 | + cv::Scalar( 39, 176, 156), | ||
| 407 | + cv::Scalar( 58, 183, 103), | ||
| 408 | + cv::Scalar( 81, 181, 63), | ||
| 409 | + cv::Scalar(150, 243, 33), | ||
| 410 | + cv::Scalar(169, 244, 3), | ||
| 411 | + cv::Scalar(188, 212, 0), | ||
| 412 | + cv::Scalar(150, 136, 0), | ||
| 413 | + cv::Scalar(175, 80, 76), | ||
| 414 | + cv::Scalar(195, 74, 139), | ||
| 415 | + cv::Scalar(220, 57, 205), | ||
| 416 | + cv::Scalar(235, 59, 255), | ||
| 417 | + cv::Scalar(193, 7, 255), | ||
| 418 | + cv::Scalar(152, 0, 255), | ||
| 419 | + cv::Scalar( 87, 34, 255), | ||
| 420 | + cv::Scalar( 85, 72, 121), | ||
| 421 | + cv::Scalar(158, 158, 158), | ||
| 422 | + cv::Scalar(125, 139, 96) | ||
| 423 | + }; | ||
| 424 | + | ||
| 425 | + for (size_t i = 0; i < objects.size(); i++) | ||
| 426 | + { | ||
| 427 | + const Object& obj = objects[i]; | ||
| 428 | + | ||
| 429 | + const cv::Scalar& color = colors[obj.label]; | ||
| 430 | + | ||
| 431 | + // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f @ %.2f\n", obj.label, obj.prob, | ||
| 432 | + // obj.rrect.center.x, obj.rrect.center.y, obj.rrect.size.width, obj.rrect.size.height, obj.rrect.angle); | ||
| 433 | + | ||
| 434 | + cv::Point2f corners[4]; | ||
| 435 | + obj.rrect.points(corners); | ||
| 436 | + cv::line(rgb, corners[0], corners[1], color); | ||
| 437 | + cv::line(rgb, corners[1], corners[2], color); | ||
| 438 | + cv::line(rgb, corners[2], corners[3], color); | ||
| 439 | + cv::line(rgb, corners[3], corners[0], color); | ||
| 440 | + } | ||
| 441 | + | ||
| 442 | + for (size_t i = 0; i < objects.size(); i++) | ||
| 443 | + { | ||
| 444 | + const Object& obj = objects[i]; | ||
| 445 | + | ||
| 446 | + const cv::Scalar& color = colors[obj.label]; | ||
| 447 | + | ||
| 448 | + char text[256]; | ||
| 449 | + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); | ||
| 450 | + | ||
| 451 | + int baseLine = 0; | ||
| 452 | + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); | ||
| 453 | + | ||
| 454 | + int x = obj.rrect.center.x - label_size.width / 2; | ||
| 455 | + int y = obj.rrect.center.y - label_size.height / 2 - baseLine; | ||
| 456 | + if (y < 0) | ||
| 457 | + y = 0; | ||
| 458 | + if (y + label_size.height > rgb.rows) | ||
| 459 | + y = rgb.rows - label_size.height; | ||
| 460 | + if (x < 0) | ||
| 461 | + x = 0; | ||
| 462 | + if (x + label_size.width > rgb.cols) | ||
| 463 | + x = rgb.cols - label_size.width; | ||
| 464 | + | ||
| 465 | + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), | ||
| 466 | + cv::Scalar(255, 255, 255), -1); | ||
| 467 | + | ||
| 468 | + cv::putText(rgb, text, cv::Point(x, y + label_size.height), | ||
| 469 | + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); | ||
| 470 | + } | ||
| 471 | + | ||
| 472 | + return 0; | ||
| 473 | +} |
| 1 | +// Tencent is pleased to support the open source community by making ncnn available. | ||
| 2 | +// | ||
| 3 | +// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. | ||
| 4 | +// | ||
| 5 | +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | ||
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 7 | +// | ||
| 8 | +// https://opensource.org/licenses/BSD-3-Clause | ||
| 9 | +// | ||
| 10 | +// Unless required by applicable law or agreed to in writing, software distributed | ||
| 11 | +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| 12 | +// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| 13 | +// specific language governing permissions and limitations under the License. | ||
| 14 | + | ||
| 15 | +// 1. install | ||
| 16 | +// pip3 install -U ultralytics pnnx ncnn | ||
| 17 | +// 2. export yolov8-pose torchscript | ||
| 18 | +// yolo export model=yolov8n-pose.pt format=torchscript | ||
| 19 | +// 3. convert torchscript with static shape | ||
| 20 | +// pnnx yolov8n-pose.torchscript | ||
| 21 | +// 4. modify yolov8n_pose_pnnx.py for dynamic shape inference | ||
| 22 | +// A. modify reshape to support dynamic image sizes | ||
| 23 | +// B. permute tensor before concat and adjust concat axis | ||
| 24 | +// C. drop post-process part | ||
| 25 | +// before: | ||
| 26 | +// v_137 = v_136.view(1, 51, 6400) | ||
| 27 | +// v_143 = v_142.view(1, 51, 1600) | ||
| 28 | +// v_149 = v_148.view(1, 51, 400) | ||
| 29 | +// v_150 = torch.cat((v_137, v_143, v_149), dim=-1) | ||
| 30 | +// ... | ||
| 31 | +// v_184 = v_161.view(1, 65, 6400) | ||
| 32 | +// v_185 = v_172.view(1, 65, 1600) | ||
| 33 | +// v_186 = v_183.view(1, 65, 400) | ||
| 34 | +// v_187 = torch.cat((v_184, v_185, v_186), dim=2) | ||
| 35 | +// ... | ||
| 36 | +// after: | ||
| 37 | +// v_137 = v_136.view(1, 51, -1).transpose(1, 2) | ||
| 38 | +// v_143 = v_142.view(1, 51, -1).transpose(1, 2) | ||
| 39 | +// v_149 = v_148.view(1, 51, -1).transpose(1, 2) | ||
| 40 | +// v_150 = torch.cat((v_137, v_143, v_149), dim=1) | ||
| 41 | +// ... | ||
| 42 | +// v_184 = v_161.view(1, 65, -1).transpose(1, 2) | ||
| 43 | +// v_185 = v_172.view(1, 65, -1).transpose(1, 2) | ||
| 44 | +// v_186 = v_183.view(1, 65, -1).transpose(1, 2) | ||
| 45 | +// v_187 = torch.cat((v_184, v_185, v_186), dim=1) | ||
| 46 | +// return v_187, v_150 | ||
| 47 | +// 5. re-export yolov8-pose torchscript | ||
| 48 | +// python3 -c 'import yolov8n_pose_pnnx; yolov8n_pose_pnnx.export_torchscript()' | ||
| 49 | +// 6. convert new torchscript with dynamic shape | ||
| 50 | +// pnnx yolov8n_pose_pnnx.py.pt inputshape=[1,3,640,640] inputshape2=[1,3,320,320] | ||
| 51 | +// 7. now you get ncnn model files | ||
| 52 | +// mv yolov8n_pose_pnnx.py.ncnn.param yolov8n_pose.ncnn.param | ||
| 53 | +// mv yolov8n_pose_pnnx.py.ncnn.bin yolov8n_pose.ncnn.bin | ||
| 54 | + | ||
| 55 | +// the out blob would be a 2-dim tensor with w=65 h=8400 | ||
| 56 | +// | ||
| 57 | +// | bbox-reg 16 x 4 |score(1)| | ||
| 58 | +// +-----+-----+-----+-----+--------+ | ||
| 59 | +// | dx0 | dy0 | dx1 | dy1 | 0.1 | | ||
| 60 | +// all /| | | | | | | ||
| 61 | +// boxes | .. | .. | .. | .. | 0.0 | | ||
| 62 | +// (8400)| | | | | . | | ||
| 63 | +// \| | | | | . | | ||
| 64 | +// +-----+-----+-----+-----+--------+ | ||
| 65 | +// | ||
| 66 | + | ||
| 67 | +// | ||
| 68 | +// | pose (51) | | ||
| 69 | +// +-----------+ | ||
| 70 | +// |0.1........| | ||
| 71 | +// all /| | | ||
| 72 | +// boxes |0.0........| | ||
| 73 | +// (8400)| . | | ||
| 74 | +// \| . | | ||
| 75 | +// +-----------+ | ||
| 76 | +// | ||
| 77 | + | ||
| 78 | +#include "yolov8.h" | ||
| 79 | + | ||
| 80 | +#include "layer.h" | ||
| 81 | + | ||
| 82 | +#include <opencv2/core/core.hpp> | ||
| 83 | +#include <opencv2/imgproc/imgproc.hpp> | ||
| 84 | + | ||
| 85 | +#include <float.h> | ||
| 86 | +#include <stdio.h> | ||
| 87 | +#include <vector> | ||
| 88 | + | ||
| 89 | +static inline float intersection_area(const Object& a, const Object& b) | ||
| 90 | +{ | ||
| 91 | + cv::Rect_<float> inter = a.rect & b.rect; | ||
| 92 | + return inter.area(); | ||
| 93 | +} | ||
| 94 | + | ||
| 95 | +static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right) | ||
| 96 | +{ | ||
| 97 | + int i = left; | ||
| 98 | + int j = right; | ||
| 99 | + float p = objects[(left + right) / 2].prob; | ||
| 100 | + | ||
| 101 | + while (i <= j) | ||
| 102 | + { | ||
| 103 | + while (objects[i].prob > p) | ||
| 104 | + i++; | ||
| 105 | + | ||
| 106 | + while (objects[j].prob < p) | ||
| 107 | + j--; | ||
| 108 | + | ||
| 109 | + if (i <= j) | ||
| 110 | + { | ||
| 111 | + // swap | ||
| 112 | + std::swap(objects[i], objects[j]); | ||
| 113 | + | ||
| 114 | + i++; | ||
| 115 | + j--; | ||
| 116 | + } | ||
| 117 | + } | ||
| 118 | + | ||
| 119 | + // #pragma omp parallel sections | ||
| 120 | + { | ||
| 121 | + // #pragma omp section | ||
| 122 | + { | ||
| 123 | + if (left < j) qsort_descent_inplace(objects, left, j); | ||
| 124 | + } | ||
| 125 | + // #pragma omp section | ||
| 126 | + { | ||
| 127 | + if (i < right) qsort_descent_inplace(objects, i, right); | ||
| 128 | + } | ||
| 129 | + } | ||
| 130 | +} | ||
| 131 | + | ||
| 132 | +static void qsort_descent_inplace(std::vector<Object>& objects) | ||
| 133 | +{ | ||
| 134 | + if (objects.empty()) | ||
| 135 | + return; | ||
| 136 | + | ||
| 137 | + qsort_descent_inplace(objects, 0, objects.size() - 1); | ||
| 138 | +} | ||
| 139 | + | ||
| 140 | +static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) | ||
| 141 | +{ | ||
| 142 | + picked.clear(); | ||
| 143 | + | ||
| 144 | + const int n = objects.size(); | ||
| 145 | + | ||
| 146 | + std::vector<float> areas(n); | ||
| 147 | + for (int i = 0; i < n; i++) | ||
| 148 | + { | ||
| 149 | + areas[i] = objects[i].rect.area(); | ||
| 150 | + } | ||
| 151 | + | ||
| 152 | + for (int i = 0; i < n; i++) | ||
| 153 | + { | ||
| 154 | + const Object& a = objects[i]; | ||
| 155 | + | ||
| 156 | + int keep = 1; | ||
| 157 | + for (int j = 0; j < (int)picked.size(); j++) | ||
| 158 | + { | ||
| 159 | + const Object& b = objects[picked[j]]; | ||
| 160 | + | ||
| 161 | + if (!agnostic && a.label != b.label) | ||
| 162 | + continue; | ||
| 163 | + | ||
| 164 | + // intersection over union | ||
| 165 | + float inter_area = intersection_area(a, b); | ||
| 166 | + float union_area = areas[i] + areas[picked[j]] - inter_area; | ||
| 167 | + // float IoU = inter_area / union_area | ||
| 168 | + if (inter_area / union_area > nms_threshold) | ||
| 169 | + keep = 0; | ||
| 170 | + } | ||
| 171 | + | ||
| 172 | + if (keep) | ||
| 173 | + picked.push_back(i); | ||
| 174 | + } | ||
| 175 | +} | ||
| 176 | + | ||
| 177 | +static inline float sigmoid(float x) | ||
| 178 | +{ | ||
| 179 | + return 1.0f / (1.0f + expf(-x)); | ||
| 180 | +} | ||
| 181 | + | ||
| 182 | +static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_points, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects) | ||
| 183 | +{ | ||
| 184 | + const int w = in_pad.w; | ||
| 185 | + const int h = in_pad.h; | ||
| 186 | + | ||
| 187 | + const int num_grid_x = w / stride; | ||
| 188 | + const int num_grid_y = h / stride; | ||
| 189 | + | ||
| 190 | + const int reg_max_1 = 16; | ||
| 191 | + const int num_points = pred_points.w / 3; | ||
| 192 | + | ||
| 193 | + for (int y = 0; y < num_grid_y; y++) | ||
| 194 | + { | ||
| 195 | + for (int x = 0; x < num_grid_x; x++) | ||
| 196 | + { | ||
| 197 | + const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1); | ||
| 198 | + const ncnn::Mat pred_points_grid = pred_points.row_range(y * num_grid_x + x, 1).reshape(3, num_points); | ||
| 199 | + | ||
| 200 | + // find label with max score | ||
| 201 | + int label = 0; | ||
| 202 | + float score = sigmoid(pred_grid[reg_max_1 * 4]); | ||
| 203 | + | ||
| 204 | + if (score >= prob_threshold) | ||
| 205 | + { | ||
| 206 | + ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4).clone(); | ||
| 207 | + | ||
| 208 | + { | ||
| 209 | + ncnn::Layer* softmax = ncnn::create_layer("Softmax"); | ||
| 210 | + | ||
| 211 | + ncnn::ParamDict pd; | ||
| 212 | + pd.set(0, 1); // axis | ||
| 213 | + pd.set(1, 1); | ||
| 214 | + softmax->load_param(pd); | ||
| 215 | + | ||
| 216 | + ncnn::Option opt; | ||
| 217 | + opt.num_threads = 1; | ||
| 218 | + opt.use_packing_layout = false; | ||
| 219 | + | ||
| 220 | + softmax->create_pipeline(opt); | ||
| 221 | + | ||
| 222 | + softmax->forward_inplace(pred_bbox, opt); | ||
| 223 | + | ||
| 224 | + softmax->destroy_pipeline(opt); | ||
| 225 | + | ||
| 226 | + delete softmax; | ||
| 227 | + } | ||
| 228 | + | ||
| 229 | + float pred_ltrb[4]; | ||
| 230 | + for (int k = 0; k < 4; k++) | ||
| 231 | + { | ||
| 232 | + float dis = 0.f; | ||
| 233 | + const float* dis_after_sm = pred_bbox.row(k); | ||
| 234 | + for (int l = 0; l < reg_max_1; l++) | ||
| 235 | + { | ||
| 236 | + dis += l * dis_after_sm[l]; | ||
| 237 | + } | ||
| 238 | + | ||
| 239 | + pred_ltrb[k] = dis * stride; | ||
| 240 | + } | ||
| 241 | + | ||
| 242 | + float pb_cx = (x + 0.5f) * stride; | ||
| 243 | + float pb_cy = (y + 0.5f) * stride; | ||
| 244 | + | ||
| 245 | + float x0 = pb_cx - pred_ltrb[0]; | ||
| 246 | + float y0 = pb_cy - pred_ltrb[1]; | ||
| 247 | + float x1 = pb_cx + pred_ltrb[2]; | ||
| 248 | + float y1 = pb_cy + pred_ltrb[3]; | ||
| 249 | + | ||
| 250 | + std::vector<KeyPoint> keypoints; | ||
| 251 | + for (int k = 0; k < num_points; k++) | ||
| 252 | + { | ||
| 253 | + KeyPoint keypoint; | ||
| 254 | + keypoint.p.x = (x + pred_points_grid.row(k)[0] * 2) * stride; | ||
| 255 | + keypoint.p.y = (y + pred_points_grid.row(k)[1] * 2) * stride; | ||
| 256 | + keypoint.prob = sigmoid(pred_points_grid.row(k)[2]); | ||
| 257 | + keypoints.push_back(keypoint); | ||
| 258 | + } | ||
| 259 | + | ||
| 260 | + Object obj; | ||
| 261 | + obj.rect.x = x0; | ||
| 262 | + obj.rect.y = y0; | ||
| 263 | + obj.rect.width = x1 - x0; | ||
| 264 | + obj.rect.height = y1 - y0; | ||
| 265 | + obj.label = label; | ||
| 266 | + obj.prob = score; | ||
| 267 | + obj.keypoints = keypoints; | ||
| 268 | + | ||
| 269 | + objects.push_back(obj); | ||
| 270 | + } | ||
| 271 | + } | ||
| 272 | + } | ||
| 273 | +} | ||
| 274 | + | ||
| 275 | +static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& pred_points, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects) | ||
| 276 | +{ | ||
| 277 | + const int w = in_pad.w; | ||
| 278 | + const int h = in_pad.h; | ||
| 279 | + | ||
| 280 | + int pred_row_offset = 0; | ||
| 281 | + for (size_t i = 0; i < strides.size(); i++) | ||
| 282 | + { | ||
| 283 | + const int stride = strides[i]; | ||
| 284 | + | ||
| 285 | + const int num_grid_x = w / stride; | ||
| 286 | + const int num_grid_y = h / stride; | ||
| 287 | + const int num_grid = num_grid_x * num_grid_y; | ||
| 288 | + | ||
| 289 | + generate_proposals(pred.row_range(pred_row_offset, num_grid), pred_points.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects); | ||
| 290 | + | ||
| 291 | + pred_row_offset += num_grid; | ||
| 292 | + } | ||
| 293 | +} | ||
| 294 | + | ||
| 295 | +int YOLOv8_pose::detect(const cv::Mat& rgb, std::vector<Object>& objects) | ||
| 296 | +{ | ||
| 297 | + const int target_size = det_target_size;//640; | ||
| 298 | + const float prob_threshold = 0.25f; | ||
| 299 | + const float nms_threshold = 0.45f; | ||
| 300 | + const float mask_threshold = 0.5f; | ||
| 301 | + | ||
| 302 | + int img_w = rgb.cols; | ||
| 303 | + int img_h = rgb.rows; | ||
| 304 | + | ||
| 305 | + // ultralytics/cfg/models/v8/yolov8.yaml | ||
| 306 | + std::vector<int> strides(3); | ||
| 307 | + strides[0] = 8; | ||
| 308 | + strides[1] = 16; | ||
| 309 | + strides[2] = 32; | ||
| 310 | + const int max_stride = 32; | ||
| 311 | + | ||
| 312 | + // letterbox pad to multiple of max_stride | ||
| 313 | + int w = img_w; | ||
| 314 | + int h = img_h; | ||
| 315 | + float scale = 1.f; | ||
| 316 | + if (w > h) | ||
| 317 | + { | ||
| 318 | + scale = (float)target_size / w; | ||
| 319 | + w = target_size; | ||
| 320 | + h = h * scale; | ||
| 321 | + } | ||
| 322 | + else | ||
| 323 | + { | ||
| 324 | + scale = (float)target_size / h; | ||
| 325 | + h = target_size; | ||
| 326 | + w = w * scale; | ||
| 327 | + } | ||
| 328 | + | ||
| 329 | + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h); | ||
| 330 | + | ||
| 331 | + // letterbox pad to target_size rectangle | ||
| 332 | + int wpad = (w + max_stride - 1) / max_stride * max_stride - w; | ||
| 333 | + int hpad = (h + max_stride - 1) / max_stride * max_stride - h; | ||
| 334 | + ncnn::Mat in_pad; | ||
| 335 | + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f); | ||
| 336 | + | ||
| 337 | + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; | ||
| 338 | + in_pad.substract_mean_normalize(0, norm_vals); | ||
| 339 | + | ||
| 340 | + ncnn::Extractor ex = yolov8.create_extractor(); | ||
| 341 | + | ||
| 342 | + ex.input("in0", in_pad); | ||
| 343 | + | ||
| 344 | + ncnn::Mat out; | ||
| 345 | + ex.extract("out0", out); | ||
| 346 | + | ||
| 347 | + ncnn::Mat out_points; | ||
| 348 | + ex.extract("out1", out_points); | ||
| 349 | + | ||
| 350 | + std::vector<Object> proposals; | ||
| 351 | + generate_proposals(out, out_points, strides, in_pad, prob_threshold, proposals); | ||
| 352 | + | ||
| 353 | + // sort all proposals by score from highest to lowest | ||
| 354 | + qsort_descent_inplace(proposals); | ||
| 355 | + | ||
| 356 | + // apply nms with nms_threshold | ||
| 357 | + std::vector<int> picked; | ||
| 358 | + nms_sorted_bboxes(proposals, picked, nms_threshold); | ||
| 359 | + | ||
| 360 | + int count = picked.size(); | ||
| 361 | + if (count == 0) | ||
| 362 | + return 0; | ||
| 363 | + | ||
| 364 | + const int num_points = out_points.w / 3; | ||
| 365 | + | ||
| 366 | + objects.resize(count); | ||
| 367 | + for (int i = 0; i < count; i++) | ||
| 368 | + { | ||
| 369 | + objects[i] = proposals[picked[i]]; | ||
| 370 | + | ||
| 371 | + // adjust offset to original unpadded | ||
| 372 | + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; | ||
| 373 | + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; | ||
| 374 | + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; | ||
| 375 | + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; | ||
| 376 | + | ||
| 377 | + for (int j = 0; j < num_points; j++) | ||
| 378 | + { | ||
| 379 | + objects[i].keypoints[j].p.x = (objects[i].keypoints[j].p.x - (wpad / 2)) / scale; | ||
| 380 | + objects[i].keypoints[j].p.y = (objects[i].keypoints[j].p.y - (hpad / 2)) / scale; | ||
| 381 | + } | ||
| 382 | + | ||
| 383 | + // clip | ||
| 384 | + x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); | ||
| 385 | + y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); | ||
| 386 | + x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); | ||
| 387 | + y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); | ||
| 388 | + | ||
| 389 | + objects[i].rect.x = x0; | ||
| 390 | + objects[i].rect.y = y0; | ||
| 391 | + objects[i].rect.width = x1 - x0; | ||
| 392 | + objects[i].rect.height = y1 - y0; | ||
| 393 | + } | ||
| 394 | + | ||
| 395 | + // sort objects by area | ||
| 396 | + struct | ||
| 397 | + { | ||
| 398 | + bool operator()(const Object& a, const Object& b) const | ||
| 399 | + { | ||
| 400 | + return a.rect.area() > b.rect.area(); | ||
| 401 | + } | ||
| 402 | + } objects_area_greater; | ||
| 403 | + std::sort(objects.begin(), objects.end(), objects_area_greater); | ||
| 404 | + | ||
| 405 | + return 0; | ||
| 406 | +} | ||
| 407 | + | ||
| 408 | +int YOLOv8_pose::draw(cv::Mat& rgb, const std::vector<Object>& objects) | ||
| 409 | +{ | ||
| 410 | + static const char* class_names[] = {"person"}; | ||
| 411 | + | ||
| 412 | + static const cv::Scalar colors[] = { | ||
| 413 | + cv::Scalar( 67, 54, 244), | ||
| 414 | + cv::Scalar( 30, 99, 233), | ||
| 415 | + cv::Scalar( 39, 176, 156), | ||
| 416 | + cv::Scalar( 58, 183, 103), | ||
| 417 | + cv::Scalar( 81, 181, 63), | ||
| 418 | + cv::Scalar(150, 243, 33), | ||
| 419 | + cv::Scalar(169, 244, 3), | ||
| 420 | + cv::Scalar(188, 212, 0), | ||
| 421 | + cv::Scalar(150, 136, 0), | ||
| 422 | + cv::Scalar(175, 80, 76), | ||
| 423 | + cv::Scalar(195, 74, 139), | ||
| 424 | + cv::Scalar(220, 57, 205), | ||
| 425 | + cv::Scalar(235, 59, 255), | ||
| 426 | + cv::Scalar(193, 7, 255), | ||
| 427 | + cv::Scalar(152, 0, 255), | ||
| 428 | + cv::Scalar( 87, 34, 255), | ||
| 429 | + cv::Scalar( 85, 72, 121), | ||
| 430 | + cv::Scalar(158, 158, 158), | ||
| 431 | + cv::Scalar(125, 139, 96) | ||
| 432 | + }; | ||
| 433 | + | ||
| 434 | + for (size_t i = 0; i < objects.size(); i++) | ||
| 435 | + { | ||
| 436 | + const Object& obj = objects[i]; | ||
| 437 | + | ||
| 438 | + const cv::Scalar& color = colors[i % 19]; | ||
| 439 | + | ||
| 440 | + // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, | ||
| 441 | + // obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); | ||
| 442 | + | ||
| 443 | + // draw bone | ||
| 444 | + static const int joint_pairs[16][2] = { | ||
| 445 | + {0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16} | ||
| 446 | + }; | ||
| 447 | + static const cv::Scalar bone_colors[] = { | ||
| 448 | + cv::Scalar( 0, 0, 255), | ||
| 449 | + cv::Scalar( 0, 0, 255), | ||
| 450 | + cv::Scalar( 0, 0, 255), | ||
| 451 | + cv::Scalar( 0, 0, 255), | ||
| 452 | + cv::Scalar( 0, 255, 128), | ||
| 453 | + cv::Scalar( 0, 255, 128), | ||
| 454 | + cv::Scalar( 0, 255, 128), | ||
| 455 | + cv::Scalar( 0, 255, 128), | ||
| 456 | + cv::Scalar( 0, 255, 128), | ||
| 457 | + cv::Scalar(255, 255, 51), | ||
| 458 | + cv::Scalar(255, 255, 51), | ||
| 459 | + cv::Scalar(255, 255, 51), | ||
| 460 | + cv::Scalar(255, 51, 153), | ||
| 461 | + cv::Scalar(255, 51, 153), | ||
| 462 | + cv::Scalar(255, 51, 153), | ||
| 463 | + cv::Scalar(255, 51, 153), | ||
| 464 | + }; | ||
| 465 | + | ||
| 466 | + for (int j = 0; j < 16; j++) | ||
| 467 | + { | ||
| 468 | + const KeyPoint& p1 = obj.keypoints[joint_pairs[j][0]]; | ||
| 469 | + const KeyPoint& p2 = obj.keypoints[joint_pairs[j][1]]; | ||
| 470 | + | ||
| 471 | + if (p1.prob < 0.2f || p2.prob < 0.2f) | ||
| 472 | + continue; | ||
| 473 | + | ||
| 474 | + cv::line(rgb, p1.p, p2.p, bone_colors[j], 2); | ||
| 475 | + } | ||
| 476 | + | ||
| 477 | + // draw joint | ||
| 478 | + for (size_t j = 0; j < obj.keypoints.size(); j++) | ||
| 479 | + { | ||
| 480 | + const KeyPoint& keypoint = obj.keypoints[j]; | ||
| 481 | + | ||
| 482 | + // fprintf(stderr, "%.2f %.2f = %.5f\n", keypoint.p.x, keypoint.p.y, keypoint.prob); | ||
| 483 | + | ||
| 484 | + if (keypoint.prob < 0.2f) | ||
| 485 | + continue; | ||
| 486 | + | ||
| 487 | + cv::circle(rgb, keypoint.p, 3, color, -1); | ||
| 488 | + } | ||
| 489 | + | ||
| 490 | + cv::rectangle(rgb, obj.rect, color); | ||
| 491 | + | ||
| 492 | + char text[256]; | ||
| 493 | + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); | ||
| 494 | + | ||
| 495 | + int baseLine = 0; | ||
| 496 | + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); | ||
| 497 | + | ||
| 498 | + int x = obj.rect.x; | ||
| 499 | + int y = obj.rect.y - label_size.height - baseLine; | ||
| 500 | + if (y < 0) | ||
| 501 | + y = 0; | ||
| 502 | + if (x + label_size.width > rgb.cols) | ||
| 503 | + x = rgb.cols - label_size.width; | ||
| 504 | + | ||
| 505 | + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), | ||
| 506 | + cv::Scalar(255, 255, 255), -1); | ||
| 507 | + | ||
| 508 | + cv::putText(rgb, text, cv::Point(x, y + label_size.height), | ||
| 509 | + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); | ||
| 510 | + } | ||
| 511 | + | ||
| 512 | + return 0; | ||
| 513 | +} |
| 1 | +// Tencent is pleased to support the open source community by making ncnn available. | ||
| 2 | +// | ||
| 3 | +// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. | ||
| 4 | +// | ||
| 5 | +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | ||
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 7 | +// | ||
| 8 | +// https://opensource.org/licenses/BSD-3-Clause | ||
| 9 | +// | ||
| 10 | +// Unless required by applicable law or agreed to in writing, software distributed | ||
| 11 | +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| 12 | +// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| 13 | +// specific language governing permissions and limitations under the License. | ||
| 14 | + | ||
| 15 | +// 1. install | ||
| 16 | +// pip3 install -U ultralytics pnnx ncnn | ||
| 17 | +// 2. export yolov8-seg torchscript | ||
| 18 | +// yolo export model=yolov8n-seg.pt format=torchscript | ||
| 19 | +// 3. convert torchscript with static shape | ||
| 20 | +// pnnx yolov8n-seg.torchscript | ||
| 21 | +// 4. modify yolov8n_seg_pnnx.py for dynamic shape inference | ||
| 22 | +// A. modify reshape to support dynamic image sizes | ||
| 23 | +// B. permute tensor before concat and adjust concat axis | ||
| 24 | +// C. drop post-process part | ||
| 25 | +// before: | ||
| 26 | +// v_144 = v_143.view(1, 32, 6400) | ||
| 27 | +// v_150 = v_149.view(1, 32, 1600) | ||
| 28 | +// v_156 = v_155.view(1, 32, 400) | ||
| 29 | +// v_157 = torch.cat((v_144, v_150, v_156), dim=2) | ||
| 30 | +// ... | ||
| 31 | +// v_191 = v_168.view(1, 144, 6400) | ||
| 32 | +// v_192 = v_179.view(1, 144, 1600) | ||
| 33 | +// v_193 = v_190.view(1, 144, 400) | ||
| 34 | +// v_194 = torch.cat((v_191, v_192, v_193), dim=2) | ||
| 35 | +// ... | ||
| 36 | +// v_215 = (v_214, v_138, ) | ||
| 37 | +// return v_215 | ||
| 38 | +// after: | ||
| 39 | +// v_144 = v_143.view(1, 32, -1).transpose(1, 2) | ||
| 40 | +// v_150 = v_149.view(1, 32, -1).transpose(1, 2) | ||
| 41 | +// v_156 = v_155.view(1, 32, -1).transpose(1, 2) | ||
| 42 | +// v_157 = torch.cat((v_144, v_150, v_156), dim=1) | ||
| 43 | +// ... | ||
| 44 | +// v_191 = v_168.view(1, 144, -1).transpose(1, 2) | ||
| 45 | +// v_192 = v_179.view(1, 144, -1).transpose(1, 2) | ||
| 46 | +// v_193 = v_190.view(1, 144, -1).transpose(1, 2) | ||
| 47 | +// v_194 = torch.cat((v_191, v_192, v_193), dim=1) | ||
| 48 | +// return v_194, v_157, v_138 | ||
| 49 | +// 5. re-export yolov8-seg torchscript | ||
| 50 | +// python3 -c 'import yolov8n_seg_pnnx; yolov8n_seg_pnnx.export_torchscript()' | ||
| 51 | +// 6. convert new torchscript with dynamic shape | ||
| 52 | +// pnnx yolov8n_seg_pnnx.py.pt inputshape=[1,3,640,640] inputshape2=[1,3,320,320] | ||
| 53 | +// 7. now you get ncnn model files | ||
| 54 | +// mv yolov8n_seg_pnnx.py.ncnn.param yolov8n_seg.ncnn.param | ||
| 55 | +// mv yolov8n_seg_pnnx.py.ncnn.bin yolov8n_seg.ncnn.bin | ||
| 56 | + | ||
| 57 | +// the out blob would be a 2-dim tensor with w=176 h=8400 | ||
| 58 | +// | ||
| 59 | +// | bbox-reg 16 x 4 | per-class scores(80) | | ||
| 60 | +// +-----+-----+-----+-----+----------------------+ | ||
| 61 | +// | dx0 | dy0 | dx1 | dy1 |0.1 0.0 0.0 0.5 ......| | ||
| 62 | +// all /| | | | | . | | ||
| 63 | +// boxes | .. | .. | .. | .. |0.0 0.9 0.0 0.0 ......| | ||
| 64 | +// (8400)| | | | | . | | ||
| 65 | +// \| | | | | . | | ||
| 66 | +// +-----+-----+-----+-----+----------------------+ | ||
| 67 | +// | ||
| 68 | + | ||
| 69 | +// | ||
| 70 | +// | mask (32) | | ||
| 71 | +// +-----------+ | ||
| 72 | +// |0.1........| | ||
| 73 | +// all /| | | ||
| 74 | +// boxes |0.0........| | ||
| 75 | +// (8400)| . | | ||
| 76 | +// \| . | | ||
| 77 | +// +-----------+ | ||
| 78 | +// | ||
| 79 | + | ||
| 80 | +#include "yolov8.h" | ||
| 81 | + | ||
| 82 | +#include "layer.h" | ||
| 83 | + | ||
| 84 | +#include <opencv2/core/core.hpp> | ||
| 85 | +#include <opencv2/imgproc/imgproc.hpp> | ||
| 86 | + | ||
| 87 | +#include <float.h> | ||
| 88 | +#include <stdio.h> | ||
| 89 | +#include <vector> | ||
| 90 | + | ||
| 91 | +static inline float intersection_area(const Object& a, const Object& b) | ||
| 92 | +{ | ||
| 93 | + cv::Rect_<float> inter = a.rect & b.rect; | ||
| 94 | + return inter.area(); | ||
| 95 | +} | ||
| 96 | + | ||
| 97 | +static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right) | ||
| 98 | +{ | ||
| 99 | + int i = left; | ||
| 100 | + int j = right; | ||
| 101 | + float p = objects[(left + right) / 2].prob; | ||
| 102 | + | ||
| 103 | + while (i <= j) | ||
| 104 | + { | ||
| 105 | + while (objects[i].prob > p) | ||
| 106 | + i++; | ||
| 107 | + | ||
| 108 | + while (objects[j].prob < p) | ||
| 109 | + j--; | ||
| 110 | + | ||
| 111 | + if (i <= j) | ||
| 112 | + { | ||
| 113 | + // swap | ||
| 114 | + std::swap(objects[i], objects[j]); | ||
| 115 | + | ||
| 116 | + i++; | ||
| 117 | + j--; | ||
| 118 | + } | ||
| 119 | + } | ||
| 120 | + | ||
| 121 | + // #pragma omp parallel sections | ||
| 122 | + { | ||
| 123 | + // #pragma omp section | ||
| 124 | + { | ||
| 125 | + if (left < j) qsort_descent_inplace(objects, left, j); | ||
| 126 | + } | ||
| 127 | + // #pragma omp section | ||
| 128 | + { | ||
| 129 | + if (i < right) qsort_descent_inplace(objects, i, right); | ||
| 130 | + } | ||
| 131 | + } | ||
| 132 | +} | ||
| 133 | + | ||
| 134 | +static void qsort_descent_inplace(std::vector<Object>& objects) | ||
| 135 | +{ | ||
| 136 | + if (objects.empty()) | ||
| 137 | + return; | ||
| 138 | + | ||
| 139 | + qsort_descent_inplace(objects, 0, objects.size() - 1); | ||
| 140 | +} | ||
| 141 | + | ||
| 142 | +static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold, bool agnostic = false) | ||
| 143 | +{ | ||
| 144 | + picked.clear(); | ||
| 145 | + | ||
| 146 | + const int n = objects.size(); | ||
| 147 | + | ||
| 148 | + std::vector<float> areas(n); | ||
| 149 | + for (int i = 0; i < n; i++) | ||
| 150 | + { | ||
| 151 | + areas[i] = objects[i].rect.area(); | ||
| 152 | + } | ||
| 153 | + | ||
| 154 | + for (int i = 0; i < n; i++) | ||
| 155 | + { | ||
| 156 | + const Object& a = objects[i]; | ||
| 157 | + | ||
| 158 | + int keep = 1; | ||
| 159 | + for (int j = 0; j < (int)picked.size(); j++) | ||
| 160 | + { | ||
| 161 | + const Object& b = objects[picked[j]]; | ||
| 162 | + | ||
| 163 | + if (!agnostic && a.label != b.label) | ||
| 164 | + continue; | ||
| 165 | + | ||
| 166 | + // intersection over union | ||
| 167 | + float inter_area = intersection_area(a, b); | ||
| 168 | + float union_area = areas[i] + areas[picked[j]] - inter_area; | ||
| 169 | + // float IoU = inter_area / union_area | ||
| 170 | + if (inter_area / union_area > nms_threshold) | ||
| 171 | + keep = 0; | ||
| 172 | + } | ||
| 173 | + | ||
| 174 | + if (keep) | ||
| 175 | + picked.push_back(i); | ||
| 176 | + } | ||
| 177 | +} | ||
| 178 | + | ||
| 179 | +static inline float sigmoid(float x) | ||
| 180 | +{ | ||
| 181 | + return 1.0f / (1.0f + expf(-x)); | ||
| 182 | +} | ||
| 183 | + | ||
| 184 | +static void generate_proposals(const ncnn::Mat& pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects) | ||
| 185 | +{ | ||
| 186 | + const int w = in_pad.w; | ||
| 187 | + const int h = in_pad.h; | ||
| 188 | + | ||
| 189 | + const int num_grid_x = w / stride; | ||
| 190 | + const int num_grid_y = h / stride; | ||
| 191 | + | ||
| 192 | + const int reg_max_1 = 16; | ||
| 193 | + const int num_class = pred.w - reg_max_1 * 4; // number of classes. 80 for COCO | ||
| 194 | + | ||
| 195 | + for (int y = 0; y < num_grid_y; y++) | ||
| 196 | + { | ||
| 197 | + for (int x = 0; x < num_grid_x; x++) | ||
| 198 | + { | ||
| 199 | + const ncnn::Mat pred_grid = pred.row_range(y * num_grid_x + x, 1); | ||
| 200 | + | ||
| 201 | + // find label with max score | ||
| 202 | + int label = -1; | ||
| 203 | + float score = -FLT_MAX; | ||
| 204 | + { | ||
| 205 | + const ncnn::Mat pred_score = pred_grid.range(reg_max_1 * 4, num_class); | ||
| 206 | + | ||
| 207 | + for (int k = 0; k < num_class; k++) | ||
| 208 | + { | ||
| 209 | + float s = pred_score[k]; | ||
| 210 | + if (s > score) | ||
| 211 | + { | ||
| 212 | + label = k; | ||
| 213 | + score = s; | ||
| 214 | + } | ||
| 215 | + } | ||
| 216 | + | ||
| 217 | + score = sigmoid(score); | ||
| 218 | + } | ||
| 219 | + | ||
| 220 | + if (score >= prob_threshold) | ||
| 221 | + { | ||
| 222 | + ncnn::Mat pred_bbox = pred_grid.range(0, reg_max_1 * 4).reshape(reg_max_1, 4).clone(); | ||
| 223 | + | ||
| 224 | + { | ||
| 225 | + ncnn::Layer* softmax = ncnn::create_layer("Softmax"); | ||
| 226 | + | ||
| 227 | + ncnn::ParamDict pd; | ||
| 228 | + pd.set(0, 1); // axis | ||
| 229 | + pd.set(1, 1); | ||
| 230 | + softmax->load_param(pd); | ||
| 231 | + | ||
| 232 | + ncnn::Option opt; | ||
| 233 | + opt.num_threads = 1; | ||
| 234 | + opt.use_packing_layout = false; | ||
| 235 | + | ||
| 236 | + softmax->create_pipeline(opt); | ||
| 237 | + | ||
| 238 | + softmax->forward_inplace(pred_bbox, opt); | ||
| 239 | + | ||
| 240 | + softmax->destroy_pipeline(opt); | ||
| 241 | + | ||
| 242 | + delete softmax; | ||
| 243 | + } | ||
| 244 | + | ||
| 245 | + float pred_ltrb[4]; | ||
| 246 | + for (int k = 0; k < 4; k++) | ||
| 247 | + { | ||
| 248 | + float dis = 0.f; | ||
| 249 | + const float* dis_after_sm = pred_bbox.row(k); | ||
| 250 | + for (int l = 0; l < reg_max_1; l++) | ||
| 251 | + { | ||
| 252 | + dis += l * dis_after_sm[l]; | ||
| 253 | + } | ||
| 254 | + | ||
| 255 | + pred_ltrb[k] = dis * stride; | ||
| 256 | + } | ||
| 257 | + | ||
| 258 | + float pb_cx = (x + 0.5f) * stride; | ||
| 259 | + float pb_cy = (y + 0.5f) * stride; | ||
| 260 | + | ||
| 261 | + float x0 = pb_cx - pred_ltrb[0]; | ||
| 262 | + float y0 = pb_cy - pred_ltrb[1]; | ||
| 263 | + float x1 = pb_cx + pred_ltrb[2]; | ||
| 264 | + float y1 = pb_cy + pred_ltrb[3]; | ||
| 265 | + | ||
| 266 | + Object obj; | ||
| 267 | + obj.rect.x = x0; | ||
| 268 | + obj.rect.y = y0; | ||
| 269 | + obj.rect.width = x1 - x0; | ||
| 270 | + obj.rect.height = y1 - y0; | ||
| 271 | + obj.label = label; | ||
| 272 | + obj.prob = score; | ||
| 273 | + obj.gindex = y * num_grid_x + x; | ||
| 274 | + | ||
| 275 | + objects.push_back(obj); | ||
| 276 | + } | ||
| 277 | + } | ||
| 278 | + } | ||
| 279 | +} | ||
| 280 | + | ||
| 281 | +static void generate_proposals(const ncnn::Mat& pred, const std::vector<int>& strides, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects) | ||
| 282 | +{ | ||
| 283 | + const int w = in_pad.w; | ||
| 284 | + const int h = in_pad.h; | ||
| 285 | + | ||
| 286 | + int pred_row_offset = 0; | ||
| 287 | + for (size_t i = 0; i < strides.size(); i++) | ||
| 288 | + { | ||
| 289 | + const int stride = strides[i]; | ||
| 290 | + | ||
| 291 | + const int num_grid_x = w / stride; | ||
| 292 | + const int num_grid_y = h / stride; | ||
| 293 | + const int num_grid = num_grid_x * num_grid_y; | ||
| 294 | + | ||
| 295 | + std::vector<Object> objects_stride; | ||
| 296 | + generate_proposals(pred.row_range(pred_row_offset, num_grid), stride, in_pad, prob_threshold, objects_stride); | ||
| 297 | + | ||
| 298 | + for (size_t j = 0; j < objects_stride.size(); j++) | ||
| 299 | + { | ||
| 300 | + Object obj = objects_stride[j]; | ||
| 301 | + obj.gindex += pred_row_offset; | ||
| 302 | + objects.push_back(obj); | ||
| 303 | + } | ||
| 304 | + | ||
| 305 | + pred_row_offset += num_grid; | ||
| 306 | + } | ||
| 307 | +} | ||
| 308 | + | ||
| 309 | +int YOLOv8_seg::detect(const cv::Mat& rgb, std::vector<Object>& objects) | ||
| 310 | +{ | ||
| 311 | + const int target_size = det_target_size;//640; | ||
| 312 | + const float prob_threshold = 0.25f; | ||
| 313 | + const float nms_threshold = 0.45f; | ||
| 314 | + const float mask_threshold = 0.5f; | ||
| 315 | + | ||
| 316 | + int img_w = rgb.cols; | ||
| 317 | + int img_h = rgb.rows; | ||
| 318 | + | ||
| 319 | + // ultralytics/cfg/models/v8/yolov8.yaml | ||
| 320 | + std::vector<int> strides(3); | ||
| 321 | + strides[0] = 8; | ||
| 322 | + strides[1] = 16; | ||
| 323 | + strides[2] = 32; | ||
| 324 | + const int max_stride = 32; | ||
| 325 | + | ||
| 326 | + // letterbox pad to multiple of max_stride | ||
| 327 | + int w = img_w; | ||
| 328 | + int h = img_h; | ||
| 329 | + float scale = 1.f; | ||
| 330 | + if (w > h) | ||
| 331 | + { | ||
| 332 | + scale = (float)target_size / w; | ||
| 333 | + w = target_size; | ||
| 334 | + h = h * scale; | ||
| 335 | + } | ||
| 336 | + else | ||
| 337 | + { | ||
| 338 | + scale = (float)target_size / h; | ||
| 339 | + h = target_size; | ||
| 340 | + w = w * scale; | ||
| 341 | + } | ||
| 342 | + | ||
| 343 | + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h); | ||
| 344 | + | ||
| 345 | + // letterbox pad to target_size rectangle | ||
| 346 | + int wpad = (w + max_stride - 1) / max_stride * max_stride - w; | ||
| 347 | + int hpad = (h + max_stride - 1) / max_stride * max_stride - h; | ||
| 348 | + ncnn::Mat in_pad; | ||
| 349 | + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f); | ||
| 350 | + | ||
| 351 | + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; | ||
| 352 | + in_pad.substract_mean_normalize(0, norm_vals); | ||
| 353 | + | ||
| 354 | + ncnn::Extractor ex = yolov8.create_extractor(); | ||
| 355 | + | ||
| 356 | + ex.input("in0", in_pad); | ||
| 357 | + | ||
| 358 | + ncnn::Mat out; | ||
| 359 | + ex.extract("out0", out); | ||
| 360 | + | ||
| 361 | + std::vector<Object> proposals; | ||
| 362 | + generate_proposals(out, strides, in_pad, prob_threshold, proposals); | ||
| 363 | + | ||
| 364 | + // sort all proposals by score from highest to lowest | ||
| 365 | + qsort_descent_inplace(proposals); | ||
| 366 | + | ||
| 367 | + // apply nms with nms_threshold | ||
| 368 | + std::vector<int> picked; | ||
| 369 | + nms_sorted_bboxes(proposals, picked, nms_threshold); | ||
| 370 | + | ||
| 371 | + int count = picked.size(); | ||
| 372 | + if (count == 0) | ||
| 373 | + return 0; | ||
| 374 | + | ||
| 375 | + ncnn::Mat mask_feat; | ||
| 376 | + ex.extract("out1", mask_feat); | ||
| 377 | + | ||
| 378 | + ncnn::Mat mask_protos; | ||
| 379 | + ex.extract("out2", mask_protos); | ||
| 380 | + | ||
| 381 | + ncnn::Mat objects_mask_feat(mask_feat.w, 1, count); | ||
| 382 | + | ||
| 383 | + objects.resize(count); | ||
| 384 | + for (int i = 0; i < count; i++) | ||
| 385 | + { | ||
| 386 | + objects[i] = proposals[picked[i]]; | ||
| 387 | + | ||
| 388 | + // adjust offset to original unpadded | ||
| 389 | + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; | ||
| 390 | + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; | ||
| 391 | + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; | ||
| 392 | + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; | ||
| 393 | + | ||
| 394 | + // clip | ||
| 395 | + x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); | ||
| 396 | + y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); | ||
| 397 | + x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); | ||
| 398 | + y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); | ||
| 399 | + | ||
| 400 | + objects[i].rect.x = x0; | ||
| 401 | + objects[i].rect.y = y0; | ||
| 402 | + objects[i].rect.width = x1 - x0; | ||
| 403 | + objects[i].rect.height = y1 - y0; | ||
| 404 | + | ||
| 405 | + // pick mask feat | ||
| 406 | + memcpy(objects_mask_feat.channel(i), mask_feat.row(objects[i].gindex), mask_feat.w * sizeof(float)); | ||
| 407 | + } | ||
| 408 | + | ||
| 409 | + // process mask | ||
| 410 | + ncnn::Mat objects_mask; | ||
| 411 | + { | ||
| 412 | + ncnn::Layer* gemm = ncnn::create_layer("Gemm"); | ||
| 413 | + | ||
| 414 | + ncnn::ParamDict pd; | ||
| 415 | + pd.set(6, 1); // constantC | ||
| 416 | + pd.set(7, count); // constantM | ||
| 417 | + pd.set(8, mask_protos.w * mask_protos.h); // constantN | ||
| 418 | + pd.set(9, mask_feat.w); // constantK | ||
| 419 | + pd.set(10, -1); // constant_broadcast_type_C | ||
| 420 | + pd.set(11, 1); // output_N1M | ||
| 421 | + gemm->load_param(pd); | ||
| 422 | + | ||
| 423 | + ncnn::Option opt; | ||
| 424 | + opt.num_threads = 1; | ||
| 425 | + opt.use_packing_layout = false; | ||
| 426 | + | ||
| 427 | + gemm->create_pipeline(opt); | ||
| 428 | + | ||
| 429 | + std::vector<ncnn::Mat> gemm_inputs(2); | ||
| 430 | + gemm_inputs[0] = objects_mask_feat; | ||
| 431 | + gemm_inputs[1] = mask_protos.reshape(mask_protos.w * mask_protos.h, 1, mask_protos.c); | ||
| 432 | + std::vector<ncnn::Mat> gemm_outputs(1); | ||
| 433 | + gemm->forward(gemm_inputs, gemm_outputs, opt); | ||
| 434 | + objects_mask = gemm_outputs[0].reshape(mask_protos.w, mask_protos.h, count); | ||
| 435 | + | ||
| 436 | + gemm->destroy_pipeline(opt); | ||
| 437 | + | ||
| 438 | + delete gemm; | ||
| 439 | + } | ||
| 440 | + { | ||
| 441 | + ncnn::Layer* sigmoid = ncnn::create_layer("Sigmoid"); | ||
| 442 | + | ||
| 443 | + ncnn::Option opt; | ||
| 444 | + opt.num_threads = 1; | ||
| 445 | + opt.use_packing_layout = false; | ||
| 446 | + | ||
| 447 | + sigmoid->create_pipeline(opt); | ||
| 448 | + | ||
| 449 | + sigmoid->forward_inplace(objects_mask, opt); | ||
| 450 | + | ||
| 451 | + sigmoid->destroy_pipeline(opt); | ||
| 452 | + | ||
| 453 | + delete sigmoid; | ||
| 454 | + } | ||
| 455 | + | ||
| 456 | + // resize mask map | ||
| 457 | + { | ||
| 458 | + ncnn::Mat objects_mask_resized; | ||
| 459 | + ncnn::resize_bilinear(objects_mask, objects_mask_resized, in_pad.w / scale, in_pad.h / scale); | ||
| 460 | + objects_mask = objects_mask_resized; | ||
| 461 | + } | ||
| 462 | + | ||
| 463 | + // create per-object mask | ||
| 464 | + for (int i = 0; i < count; i++) | ||
| 465 | + { | ||
| 466 | + Object& obj = objects[i]; | ||
| 467 | + | ||
| 468 | + const ncnn::Mat mm = objects_mask.channel(i); | ||
| 469 | + | ||
| 470 | + obj.mask = cv::Mat((int)obj.rect.height, (int)obj.rect.width, CV_8UC1); | ||
| 471 | + | ||
| 472 | + // adjust offset to original unpadded and clip inside object box | ||
| 473 | + for (int y = 0; y < (int)obj.rect.height; y++) | ||
| 474 | + { | ||
| 475 | + const float* pmm = mm.row((int)(hpad / 2 / scale + obj.rect.y + y)) + (int)(wpad / 2 / scale + obj.rect.x); | ||
| 476 | + uchar* pmask = obj.mask.ptr<uchar>(y); | ||
| 477 | + for (int x = 0; x < (int)obj.rect.width; x++) | ||
| 478 | + { | ||
| 479 | + pmask[x] = pmm[x] > mask_threshold ? 1 : 0; | ||
| 480 | + } | ||
| 481 | + } | ||
| 482 | + } | ||
| 483 | + | ||
| 484 | + // sort objects by area | ||
| 485 | + struct | ||
| 486 | + { | ||
| 487 | + bool operator()(const Object& a, const Object& b) const | ||
| 488 | + { | ||
| 489 | + return a.rect.area() > b.rect.area(); | ||
| 490 | + } | ||
| 491 | + } objects_area_greater; | ||
| 492 | + std::sort(objects.begin(), objects.end(), objects_area_greater); | ||
| 493 | + | ||
| 494 | + return 0; | ||
| 495 | +} | ||
| 496 | + | ||
| 497 | +int YOLOv8_seg::draw(cv::Mat& rgb, const std::vector<Object>& objects) | ||
| 498 | +{ | ||
| 499 | + static const char* class_names[] = { | ||
| 500 | + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", | ||
| 501 | + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", | ||
| 502 | + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", | ||
| 503 | + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", | ||
| 504 | + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", | ||
| 505 | + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", | ||
| 506 | + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", | ||
| 507 | + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", | ||
| 508 | + "hair drier", "toothbrush" | ||
| 509 | + }; | ||
| 510 | + | ||
| 511 | + static cv::Scalar colors[] = { | ||
| 512 | + cv::Scalar( 67, 54, 244), | ||
| 513 | + cv::Scalar( 30, 99, 233), | ||
| 514 | + cv::Scalar( 39, 176, 156), | ||
| 515 | + cv::Scalar( 58, 183, 103), | ||
| 516 | + cv::Scalar( 81, 181, 63), | ||
| 517 | + cv::Scalar(150, 243, 33), | ||
| 518 | + cv::Scalar(169, 244, 3), | ||
| 519 | + cv::Scalar(188, 212, 0), | ||
| 520 | + cv::Scalar(150, 136, 0), | ||
| 521 | + cv::Scalar(175, 80, 76), | ||
| 522 | + cv::Scalar(195, 74, 139), | ||
| 523 | + cv::Scalar(220, 57, 205), | ||
| 524 | + cv::Scalar(235, 59, 255), | ||
| 525 | + cv::Scalar(193, 7, 255), | ||
| 526 | + cv::Scalar(152, 0, 255), | ||
| 527 | + cv::Scalar( 87, 34, 255), | ||
| 528 | + cv::Scalar( 85, 72, 121), | ||
| 529 | + cv::Scalar(158, 158, 158), | ||
| 530 | + cv::Scalar(125, 139, 96) | ||
| 531 | + }; | ||
| 532 | + | ||
| 533 | + for (size_t i = 0; i < objects.size(); i++) | ||
| 534 | + { | ||
| 535 | + const Object& obj = objects[i]; | ||
| 536 | + | ||
| 537 | + const cv::Scalar& color = colors[i % 19]; | ||
| 538 | + | ||
| 539 | + // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, | ||
| 540 | + // obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); | ||
| 541 | + | ||
| 542 | + for (int y = 0; y < (int)obj.rect.height; y++) | ||
| 543 | + { | ||
| 544 | + const uchar* maskptr = obj.mask.ptr<const uchar>(y); | ||
| 545 | + uchar* bgrptr = rgb.ptr<uchar>((int)obj.rect.y + y) + (int)obj.rect.x * 3; | ||
| 546 | + for (int x = 0; x < (int)obj.rect.width; x++) | ||
| 547 | + { | ||
| 548 | + if (maskptr[x]) | ||
| 549 | + { | ||
| 550 | + bgrptr[0] = bgrptr[0] * 0.5 + color[0] * 0.5; | ||
| 551 | + bgrptr[1] = bgrptr[1] * 0.5 + color[1] * 0.5; | ||
| 552 | + bgrptr[2] = bgrptr[2] * 0.5 + color[2] * 0.5; | ||
| 553 | + } | ||
| 554 | + bgrptr += 3; | ||
| 555 | + } | ||
| 556 | + } | ||
| 557 | + | ||
| 558 | + cv::rectangle(rgb, obj.rect, color); | ||
| 559 | + | ||
| 560 | + char text[256]; | ||
| 561 | + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); | ||
| 562 | + | ||
| 563 | + int baseLine = 0; | ||
| 564 | + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); | ||
| 565 | + | ||
| 566 | + int x = obj.rect.x; | ||
| 567 | + int y = obj.rect.y - label_size.height - baseLine; | ||
| 568 | + if (y < 0) | ||
| 569 | + y = 0; | ||
| 570 | + if (x + label_size.width > rgb.cols) | ||
| 571 | + x = rgb.cols - label_size.width; | ||
| 572 | + | ||
| 573 | + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), | ||
| 574 | + cv::Scalar(255, 255, 255), -1); | ||
| 575 | + | ||
| 576 | + cv::putText(rgb, text, cv::Point(x, y + label_size.height), | ||
| 577 | + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); | ||
| 578 | + } | ||
| 579 | + | ||
| 580 | + return 0; | ||
| 581 | +} |
| 1 | +// Tencent is pleased to support the open source community by making ncnn available. | ||
| 2 | +// | ||
| 3 | +// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. | ||
| 4 | +// | ||
| 5 | +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | ||
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 7 | +// | ||
| 8 | +// https://opensource.org/licenses/BSD-3-Clause | ||
| 9 | +// | ||
| 10 | +// Unless required by applicable law or agreed to in writing, software distributed | ||
| 11 | +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| 12 | +// CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| 13 | +// specific language governing permissions and limitations under the License. | ||
| 14 | + | ||
| 15 | +#include <android/asset_manager_jni.h> | ||
| 16 | +#include <android/native_window_jni.h> | ||
| 17 | +#include <android/native_window.h> | ||
| 18 | + | ||
| 19 | +#include <android/log.h> | ||
| 20 | + | ||
| 21 | +#include <jni.h> | ||
| 22 | + | ||
| 23 | +#include <string> | ||
| 24 | +#include <vector> | ||
| 25 | + | ||
| 26 | +#include <platform.h> | ||
| 27 | +#include <benchmark.h> | ||
| 28 | + | ||
| 29 | +#include "yolov8.h" | ||
| 30 | + | ||
| 31 | +#include "ndkcamera.h" | ||
| 32 | + | ||
| 33 | +#include <opencv2/core/core.hpp> | ||
| 34 | +#include <opencv2/imgproc/imgproc.hpp> | ||
| 35 | + | ||
| 36 | +#if __ARM_NEON | ||
| 37 | +#include <arm_neon.h> | ||
| 38 | +#endif // __ARM_NEON | ||
| 39 | + | ||
| 40 | +static int draw_unsupported(cv::Mat& rgb) | ||
| 41 | +{ | ||
| 42 | + const char text[] = "unsupported"; | ||
| 43 | + | ||
| 44 | + int baseLine = 0; | ||
| 45 | + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 1.0, 1, &baseLine); | ||
| 46 | + | ||
| 47 | + int y = (rgb.rows - label_size.height) / 2; | ||
| 48 | + int x = (rgb.cols - label_size.width) / 2; | ||
| 49 | + | ||
| 50 | + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), | ||
| 51 | + cv::Scalar(255, 255, 255), -1); | ||
| 52 | + | ||
| 53 | + cv::putText(rgb, text, cv::Point(x, y + label_size.height), | ||
| 54 | + cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(0, 0, 0)); | ||
| 55 | + | ||
| 56 | + return 0; | ||
| 57 | +} | ||
| 58 | + | ||
| 59 | +static int draw_fps(cv::Mat& rgb) | ||
| 60 | +{ | ||
| 61 | + // resolve moving average | ||
| 62 | + float avg_fps = 0.f; | ||
| 63 | + { | ||
| 64 | + static double t0 = 0.f; | ||
| 65 | + static float fps_history[10] = {0.f}; | ||
| 66 | + | ||
| 67 | + double t1 = ncnn::get_current_time(); | ||
| 68 | + if (t0 == 0.f) | ||
| 69 | + { | ||
| 70 | + t0 = t1; | ||
| 71 | + return 0; | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + float fps = 1000.f / (t1 - t0); | ||
| 75 | + t0 = t1; | ||
| 76 | + | ||
| 77 | + for (int i = 9; i >= 1; i--) | ||
| 78 | + { | ||
| 79 | + fps_history[i] = fps_history[i - 1]; | ||
| 80 | + } | ||
| 81 | + fps_history[0] = fps; | ||
| 82 | + | ||
| 83 | + if (fps_history[9] == 0.f) | ||
| 84 | + { | ||
| 85 | + return 0; | ||
| 86 | + } | ||
| 87 | + | ||
| 88 | + for (int i = 0; i < 10; i++) | ||
| 89 | + { | ||
| 90 | + avg_fps += fps_history[i]; | ||
| 91 | + } | ||
| 92 | + avg_fps /= 10.f; | ||
| 93 | + } | ||
| 94 | + | ||
| 95 | + char text[32]; | ||
| 96 | + sprintf(text, "FPS=%.2f", avg_fps); | ||
| 97 | + | ||
| 98 | + int baseLine = 0; | ||
| 99 | + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); | ||
| 100 | + | ||
| 101 | + int y = 0; | ||
| 102 | + int x = rgb.cols - label_size.width; | ||
| 103 | + | ||
| 104 | + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), | ||
| 105 | + cv::Scalar(255, 255, 255), -1); | ||
| 106 | + | ||
| 107 | + cv::putText(rgb, text, cv::Point(x, y + label_size.height), | ||
| 108 | + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); | ||
| 109 | + | ||
| 110 | + return 0; | ||
| 111 | +} | ||
| 112 | + | ||
| 113 | +static YOLOv8* g_yolov8 = 0; | ||
| 114 | +static ncnn::Mutex lock; | ||
| 115 | + | ||
| 116 | +class MyNdkCamera : public NdkCameraWindow | ||
| 117 | +{ | ||
| 118 | +public: | ||
| 119 | + virtual void on_image_render(cv::Mat& rgb) const; | ||
| 120 | +}; | ||
| 121 | + | ||
| 122 | +void MyNdkCamera::on_image_render(cv::Mat& rgb) const | ||
| 123 | +{ | ||
| 124 | + // yolov8 | ||
| 125 | + { | ||
| 126 | + ncnn::MutexLockGuard g(lock); | ||
| 127 | + | ||
| 128 | + if (g_yolov8) | ||
| 129 | + { | ||
| 130 | + std::vector<Object> objects; | ||
| 131 | + g_yolov8->detect(rgb, objects); | ||
| 132 | + | ||
| 133 | + g_yolov8->draw(rgb, objects); | ||
| 134 | + } | ||
| 135 | + else | ||
| 136 | + { | ||
| 137 | + draw_unsupported(rgb); | ||
| 138 | + } | ||
| 139 | + } | ||
| 140 | + | ||
| 141 | + draw_fps(rgb); | ||
| 142 | +} | ||
| 143 | + | ||
| 144 | +static MyNdkCamera* g_camera = 0; | ||
| 145 | + | ||
| 146 | +extern "C" { | ||
| 147 | + | ||
| 148 | +JNIEXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved) | ||
| 149 | +{ | ||
| 150 | + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "JNI_OnLoad"); | ||
| 151 | + | ||
| 152 | + g_camera = new MyNdkCamera; | ||
| 153 | + | ||
| 154 | + ncnn::create_gpu_instance(); | ||
| 155 | + | ||
| 156 | + return JNI_VERSION_1_4; | ||
| 157 | +} | ||
| 158 | + | ||
| 159 | +JNIEXPORT void JNI_OnUnload(JavaVM* vm, void* reserved) | ||
| 160 | +{ | ||
| 161 | + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "JNI_OnUnload"); | ||
| 162 | + | ||
| 163 | + { | ||
| 164 | + ncnn::MutexLockGuard g(lock); | ||
| 165 | + | ||
| 166 | + delete g_yolov8; | ||
| 167 | + g_yolov8 = 0; | ||
| 168 | + } | ||
| 169 | + | ||
| 170 | + ncnn::destroy_gpu_instance(); | ||
| 171 | + | ||
| 172 | + delete g_camera; | ||
| 173 | + g_camera = 0; | ||
| 174 | +} | ||
| 175 | + | ||
| 176 | +// public native boolean loadModel(AssetManager mgr, int taskid, int modelid, int cpugpu); | ||
| 177 | +JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_loadModel(JNIEnv* env, jobject thiz, jobject assetManager, jint taskid, jint modelid, jint cpugpu) | ||
| 178 | +{ | ||
| 179 | + if (taskid < 0 || taskid > 5 || modelid < 0 || modelid > 8 || cpugpu < 0 || cpugpu > 2) | ||
| 180 | + { | ||
| 181 | + return JNI_FALSE; | ||
| 182 | + } | ||
| 183 | + | ||
| 184 | + AAssetManager* mgr = AAssetManager_fromJava(env, assetManager); | ||
| 185 | + | ||
| 186 | + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "loadModel %p", mgr); | ||
| 187 | + | ||
| 188 | + const char* tasknames[6] = | ||
| 189 | + { | ||
| 190 | + "", | ||
| 191 | + "_oiv7", | ||
| 192 | + "_seg", | ||
| 193 | + "_pose", | ||
| 194 | + "_cls", | ||
| 195 | + "_obb" | ||
| 196 | + }; | ||
| 197 | + | ||
| 198 | + const char* modeltypes[9] = | ||
| 199 | + { | ||
| 200 | + "n", | ||
| 201 | + "s", | ||
| 202 | + "m", | ||
| 203 | + "n", | ||
| 204 | + "s", | ||
| 205 | + "m", | ||
| 206 | + "n", | ||
| 207 | + "s", | ||
| 208 | + "m" | ||
| 209 | + }; | ||
| 210 | + | ||
| 211 | + std::string parampath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.param"; | ||
| 212 | + std::string modelpath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.bin"; | ||
| 213 | + bool use_gpu = (int)cpugpu == 1; | ||
| 214 | + bool use_turnip = (int)cpugpu == 2; | ||
| 215 | + | ||
| 216 | + // reload | ||
| 217 | + { | ||
| 218 | + ncnn::MutexLockGuard g(lock); | ||
| 219 | + | ||
| 220 | + { | ||
| 221 | + static int old_taskid = 0; | ||
| 222 | + static int old_modelid = 0; | ||
| 223 | + static int old_cpugpu = 0; | ||
| 224 | + if (taskid != old_taskid || (modelid % 3) != old_modelid || cpugpu != old_cpugpu) | ||
| 225 | + { | ||
| 226 | + // taskid or model or cpugpu changed | ||
| 227 | + delete g_yolov8; | ||
| 228 | + g_yolov8 = 0; | ||
| 229 | + } | ||
| 230 | + old_taskid = taskid; | ||
| 231 | + old_modelid = modelid % 3; | ||
| 232 | + old_cpugpu = cpugpu; | ||
| 233 | + | ||
| 234 | + ncnn::destroy_gpu_instance(); | ||
| 235 | + | ||
| 236 | + if (use_turnip) | ||
| 237 | + { | ||
| 238 | + ncnn::create_gpu_instance("libvulkan_freedreno.so"); | ||
| 239 | + } | ||
| 240 | + else if (use_gpu) | ||
| 241 | + { | ||
| 242 | + ncnn::create_gpu_instance(); | ||
| 243 | + } | ||
| 244 | + | ||
| 245 | + if (!g_yolov8) | ||
| 246 | + { | ||
| 247 | + if (taskid == 0) g_yolov8 = new YOLOv8_det_coco; | ||
| 248 | + if (taskid == 1) g_yolov8 = new YOLOv8_det_oiv7; | ||
| 249 | + if (taskid == 2) g_yolov8 = new YOLOv8_seg; | ||
| 250 | + if (taskid == 3) g_yolov8 = new YOLOv8_pose; | ||
| 251 | + if (taskid == 4) g_yolov8 = new YOLOv8_cls; | ||
| 252 | + if (taskid == 5) g_yolov8 = new YOLOv8_obb; | ||
| 253 | + | ||
| 254 | + g_yolov8->load(mgr, parampath.c_str(), modelpath.c_str(), use_gpu || use_turnip); | ||
| 255 | + } | ||
| 256 | + int target_size = 320; | ||
| 257 | + if ((int)modelid >= 3) | ||
| 258 | + target_size = 480; | ||
| 259 | + if ((int)modelid >= 6) | ||
| 260 | + target_size = 640; | ||
| 261 | + g_yolov8->set_det_target_size(target_size); | ||
| 262 | + } | ||
| 263 | + } | ||
| 264 | + | ||
| 265 | + return JNI_TRUE; | ||
| 266 | +} | ||
| 267 | + | ||
| 268 | +// public native boolean openCamera(int facing); | ||
| 269 | +JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_openCamera(JNIEnv* env, jobject thiz, jint facing) | ||
| 270 | +{ | ||
| 271 | + if (facing < 0 || facing > 1) | ||
| 272 | + return JNI_FALSE; | ||
| 273 | + | ||
| 274 | + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "openCamera %d", facing); | ||
| 275 | + | ||
| 276 | + g_camera->open((int)facing); | ||
| 277 | + | ||
| 278 | + return JNI_TRUE; | ||
| 279 | +} | ||
| 280 | + | ||
| 281 | +// public native boolean closeCamera(); | ||
| 282 | +JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_closeCamera(JNIEnv* env, jobject thiz) | ||
| 283 | +{ | ||
| 284 | + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "closeCamera"); | ||
| 285 | + | ||
| 286 | + g_camera->close(); | ||
| 287 | + | ||
| 288 | + return JNI_TRUE; | ||
| 289 | +} | ||
| 290 | + | ||
| 291 | +// public native boolean setOutputWindow(Surface surface); | ||
| 292 | +JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_setOutputWindow(JNIEnv* env, jobject thiz, jobject surface) | ||
| 293 | +{ | ||
| 294 | + ANativeWindow* win = ANativeWindow_fromSurface(env, surface); | ||
| 295 | + | ||
| 296 | + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "setOutputWindow %p", win); | ||
| 297 | + | ||
| 298 | + g_camera->set_window(win); | ||
| 299 | + | ||
| 300 | + return JNI_TRUE; | ||
| 301 | +} | ||
| 302 | + | ||
| 303 | +} |
-
请 注册 或 登录 后发表评论