YOLOv3鐘錶與類比儀表偵測_藉由NMS濾除重複輸出偵測框
Non-Maximum Suppression (NMS)
程式碼
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | #include "opencv2/opencv.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/core.hpp" #include "opencv2/dnn.hpp" #include <iostream> using namespace std; using namespace cv; using namespace cv::dnn; int main(int argc, char* argv[]) { const char* imagename = "C:/img/sample1_1.jpg"; const char* weight_file = "C:/YOLOv3-416/yolov3.weights"; const char* cfg_file = "C:/YOLOv3-416/yolov3.cfg"; const char* label_map = "C:/YOLOv3-416/object_detection_classes_yolov3.txt"; Net net = readNetFromDarknet(cfg_file, weight_file);//Load network model net.setPreferableBackend(DNN_BACKEND_OPENCV); net.setPreferableTarget(DNN_TARGET_CPU); vector<string> layer_names = net.getLayerNames(); for (int i = 0; i < layer_names.size(); i++) { int id = net.getLayerId(layer_names[i]); auto layer = net.getLayer(id); printf("layer id : %d,type : %s,name : %s \n", id, layer->type.c_str(), layer->name.c_str()); } Mat img = imread(imagename); if (img.empty()) { fprintf(stderr, "Can not load image %s\n", imagename); return -1; } namedWindow("image", 0); //resizeWindow("image", 900, 1200); resizeWindow("image", 600, 800); imshow("image", img); Mat blob = blobFromImage(img, 0.00392, Size(416, 416), Scalar(), true, false); net.setInput(blob); vector<Mat> outputs; net.forward(outputs, layer_names); vector<Rect> boxes; vector<int> classIds; vector<float> confidences; for (int i = 0; i < outputs.size(); i++) { float* data = (float*)outputs[i].data; for (int j = 0; j < outputs[i].rows; j++, data += outputs[i].cols) { Mat scores = outputs[i].row(j).colRange(5, outputs[i].cols); Point classIdPoint; double confidence; minMaxLoc(scores, 0, &confidence, 0, &classIdPoint); if (confidence > 0.5) { int center_x = (int)(data[0] * img.cols); int center_y = (int)(data[1] * img.rows); int width = (int)(data[2] * img.cols); int height = (int)(data[3] * img.rows); int left = center_x - width / 2; int top = center_y - height / 2; classIds.push_back(classIdPoint.x); confidences.push_back(confidence); boxes.push_back(Rect(left,top,width,height)); rectangle(img, Rect(left, top, width, height) , Scalar(0,0,255) , 3 , 8); } } } //namedWindow("res", 0); //resizeWindow("res", 600, 800); //imshow("res", img); vector<int> indexes; NMSBoxes(boxes , confidences , 0.5 , 0.5 , indexes); for (int i = 0; i < indexes.size(); i++) { int idx = classIds[i]; Rect box = boxes[i]; rectangle(img , box , Scalar(255, 0,0) , 3 , 8); } namedWindow("nms_res", 0); resizeWindow("nms_res", 600, 800); imshow("nms_res", img); waitKey(0); return 0; } |
微調打印score跟辨識類別的文字訊息
程式碼
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | #include "opencv2/opencv.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/core.hpp" #include "opencv2/dnn.hpp" #include <iostream> #include <fstream> #include<cmath> #include<string> using namespace std; using namespace cv; using namespace cv::dnn; void show_histogram(std::string const& name, cv::Mat1b const& image) { // Set histogram bins count int bins = 256; int histSize[] = { bins }; // Set ranges for histogram bins float lranges[] = { 0, 256 }; const float* ranges[] = { lranges }; // create matrix for histogram cv::Mat hist; int channels[] = { 0 }; // create matrix for histogram visualization int const hist_height = 256; cv::Mat3b hist_image = cv::Mat3b::zeros(hist_height, bins); cv::calcHist(&image, 1, channels, cv::Mat(), hist, 1, histSize, ranges, true, false); double max_val = 0; minMaxLoc(hist, 0, &max_val); // visualize each bin for (int b = 0; b < bins; b++) { float const binVal = hist.at<float>(b); int const height = cvRound(binVal * hist_height / max_val); cv::line (hist_image , cv::Point(b, hist_height - height), cv::Point(b, hist_height) , cv::Scalar::all(255) ); } namedWindow(name, 0); resizeWindow(name, 300, 400); imshow(name, hist_image); } int main() { //string imageFileName = "sample2_5.jpg";//THRESH_OTSU //string imageFileName = "IMG_20220721_194716.jpg";//THRESH_TRIANGLE //string imageFileName = "IMG_20220721_195110.jpg"; //IMG_20220721_200920.jpg //string imageFileName = "IMG_20220721_200920.jpg"; //string imagename = "C:/img/test/02/"; bool IsNeedShowScore = true; string imageFileName = "IMG_20220721_200048.jpg"; string imagename = "C:/img/test/23/"; imagename.append(imageFileName); string weight_file = "C:/YOLOv3-416/yolov3.weights"; string cfg_file = "C:/YOLOv3-416/yolov3.cfg"; string label_map = "C:/YOLOv3-416/object_detection_classes_yolov3.txt"; vector<string> classNames; ifstream fp(label_map); if (!fp.is_open()) { printf("could not find the file \n"); exit(-1); } string name; while (!fp.eof()) { getline(fp , name); if (name.length()) { classNames.push_back(name); } } fp.close(); Net net = readNetFromDarknet(cfg_file, weight_file);//Load network model net.setPreferableBackend(DNN_BACKEND_OPENCV); net.setPreferableTarget(DNN_TARGET_CPU); vector<string> layer_names = net.getLayerNames(); for (int i = 0; i < layer_names.size(); i++) { int id = net.getLayerId(layer_names[i]); auto layer = net.getLayer(id); printf("layer id : %d,type : %s,name : %s \n", id, layer->type.c_str(), layer->name.c_str()); } Mat img = imread(imagename); if (img.empty()) { fprintf(stderr, "Can not load image %s\n", imagename); return -1; } //namedWindow("image", 0); //resizeWindow("image", 600, 800); //imshow("image", img); Mat blob = blobFromImage(img, 0.00392, Size(416, 416), Scalar(), true, false); net.setInput(blob); vector<Mat> outputs; net.forward(outputs, layer_names); vector<Rect> boxes; vector<int> classIds; vector<float> confidences; for (int i = 0; i < outputs.size(); i++) { float* data = (float*)outputs[i].data; for (int j = 0; j < outputs[i].rows; j++, data += outputs[i].cols) { Mat scores = outputs[i].row(j).colRange(5, outputs[i].cols); Point classIdPoint; double confidence; minMaxLoc(scores, 0, &confidence, 0, &classIdPoint); if (confidence > 0.5) { int center_x = (int)(data[0] * img.cols); int center_y = (int)(data[1] * img.rows); int width = (int)(data[2] * img.cols); int height = (int)(data[3] * img.rows); int left = center_x - width / 2; int top = center_y - height / 2; classIds.push_back(classIdPoint.x); confidences.push_back(confidence); boxes.push_back(Rect(left, top, width, height)); //rectangle(img, Rect(left, top, width, height) , Scalar(0,0,255) , 3 , 8); } } } //namedWindow("res", 0); //resizeWindow("res", 600, 800); //imshow("res", img); vector<int> indexes; Rect Roi; NMSBoxes(boxes, confidences, 0.5, 0.5, indexes); cout << "indexes size:" << indexes.size() << endl; for (int i = 0; i < indexes.size(); i++) { int idx = classIds[i]; Rect box = boxes[i]; /*if (indexes.size() == 1) { box.x = abs(box.x); box.y = abs(box.y); box.width = abs(box.width); box.height = abs(box.height); Roi = box; }*/ rectangle(img, box, Scalar(255, 0, 0), 3, 8); if (IsNeedShowScore) { putText(img, format("score: %.2f,%s", confidences[indexes[i]], classNames[idx].c_str()), box.tl(), FONT_HERSHEY_SIMPLEX, 4, Scalar(255, 0, 255), 3, 8); } } namedWindow("nms_res", 0); resizeWindow("nms_res", 600, 800); imshow("nms_res", img); waitKey(0); return 0; } |
Ref:
機器/深度學習: 物件偵測 Non-Maximum Suppression (NMS)
https://chih-sheng-huang821.medium.com/%E6%A9%9F%E5%99%A8-%E6%B7%B1%E5%BA%A6%E5%AD%B8%E7%BF%92-%E7%89%A9%E4%BB%B6%E5%81%B5%E6%B8%AC-non-maximum-suppression-nms-aa70c45adffa
非极大值抑制(Non-Maximum Suppression,NMS)
https://www.cnblogs.com/makefile/p/nms.html
Advanced deep learning based object detection methods
https://www.slideshare.net/Brodmann17/advanced-deep-learning-based-object-detection-methods
留言
張貼留言