YOLOv3鐘錶與類比儀表偵測_藉由NMS濾除重複輸出偵測框

- 7月 30, 2022

Non-Maximum Suppression (NMS)

程式碼

#include "opencv2/opencv.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/core.hpp"
#include "opencv2/dnn.hpp"
#include <iostream>
using namespace std;
using namespace cv;
using namespace cv::dnn;

int main(int argc, char* argv[]) {
	const char* imagename = "C:/img/sample1_1.jpg";
	const char* weight_file = "C:/YOLOv3-416/yolov3.weights";
	const char* cfg_file = "C:/YOLOv3-416/yolov3.cfg";
	const char* label_map = "C:/YOLOv3-416/object_detection_classes_yolov3.txt";
	Net net = readNetFromDarknet(cfg_file, weight_file);//Load network model
	net.setPreferableBackend(DNN_BACKEND_OPENCV);
	net.setPreferableTarget(DNN_TARGET_CPU);

	vector<string> layer_names = net.getLayerNames();
	for (int i = 0; i < layer_names.size(); i++)
	{
		int id = net.getLayerId(layer_names[i]);
		auto layer = net.getLayer(id);
		printf("layer id : %d,type : %s,name : %s \n", id, layer->type.c_str(), layer->name.c_str());
	}
	Mat img = imread(imagename);
	if (img.empty()) {
		fprintf(stderr, "Can not load image %s\n", imagename);
		return -1;
	}
	namedWindow("image", 0);
	//resizeWindow("image", 900, 1200);
	resizeWindow("image", 600, 800);
	imshow("image", img);

	Mat blob = blobFromImage(img, 0.00392, Size(416, 416), Scalar(), true, false);
	net.setInput(blob);

	vector<Mat> outputs;
	net.forward(outputs, layer_names);
	vector<Rect> boxes;
	vector<int> classIds;
	vector<float> confidences;
	for (int i = 0; i < outputs.size(); i++)
	{
		float* data = (float*)outputs[i].data;
		for (int j = 0; j < outputs[i].rows; j++, data += outputs[i].cols) {
			Mat scores = outputs[i].row(j).colRange(5, outputs[i].cols);
			Point classIdPoint;
			double confidence;
			minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
			if (confidence > 0.5) {
				int center_x = (int)(data[0] * img.cols);
				int center_y = (int)(data[1] * img.rows);
				int width = (int)(data[2] * img.cols);
				int height = (int)(data[3] * img.rows);
				int left = center_x - width / 2;
				int top = center_y - height / 2;
				classIds.push_back(classIdPoint.x);
				confidences.push_back(confidence);
				boxes.push_back(Rect(left,top,width,height));
				rectangle(img, Rect(left, top, width, height) , Scalar(0,0,255) , 3 , 8);
			}
		}
	}

	//namedWindow("res", 0);
	//resizeWindow("res", 600, 800);
	//imshow("res", img);

	vector<int> indexes;
	NMSBoxes(boxes , confidences , 0.5 , 0.5 , indexes);
	for (int i = 0; i < indexes.size(); i++)
	{
		int idx = classIds[i];
		Rect box = boxes[i];
		rectangle(img , box , Scalar(255, 0,0) , 3 , 8);
	}
	namedWindow("nms_res", 0);
	resizeWindow("nms_res", 600, 800);
	imshow("nms_res", img);
	waitKey(0);
	return 0;
}

微調打印score跟辨識類別的文字訊息

程式碼

#include "opencv2/opencv.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/core.hpp"
#include "opencv2/dnn.hpp"
#include <iostream>
#include <fstream> 
#include<cmath>
#include<string>
using namespace std;
using namespace cv;
using namespace cv::dnn;

void show_histogram(std::string const& name, cv::Mat1b const& image)
{
	// Set histogram bins count
	int bins = 256;
	int histSize[] = { bins };
	// Set ranges for histogram bins
	float lranges[] = { 0, 256 };
	const float* ranges[] = { lranges };
	// create matrix for histogram
	cv::Mat hist;
	int channels[] = { 0 };

	// create matrix for histogram visualization
	int const hist_height = 256;
	cv::Mat3b hist_image = cv::Mat3b::zeros(hist_height, bins);

	cv::calcHist(&image, 1, channels, cv::Mat(), hist, 1, histSize, ranges, true, false);

	double max_val = 0;
	minMaxLoc(hist, 0, &max_val);

	// visualize each bin
	for (int b = 0; b < bins; b++) {
		float const binVal = hist.at<float>(b);
		int   const height = cvRound(binVal * hist_height / max_val);
		cv::line
		(hist_image
			, cv::Point(b, hist_height - height), cv::Point(b, hist_height)
			, cv::Scalar::all(255)
		);
	}

	namedWindow(name, 0);
	resizeWindow(name, 300, 400);
	imshow(name, hist_image);
}


int main()
{
	//string imageFileName = "sample2_5.jpg";//THRESH_OTSU
	//string imageFileName = "IMG_20220721_194716.jpg";//THRESH_TRIANGLE
	//string imageFileName = "IMG_20220721_195110.jpg";
	//IMG_20220721_200920.jpg

	//string imageFileName = "IMG_20220721_200920.jpg";
	//string imagename = "C:/img/test/02/";

	bool IsNeedShowScore = true;
	string imageFileName = "IMG_20220721_200048.jpg";
	string imagename = "C:/img/test/23/";


	imagename.append(imageFileName);

	string weight_file = "C:/YOLOv3-416/yolov3.weights";
	string cfg_file = "C:/YOLOv3-416/yolov3.cfg";
	string label_map = "C:/YOLOv3-416/object_detection_classes_yolov3.txt";

	vector<string> classNames;
	ifstream fp(label_map);
	if (!fp.is_open()) {
		printf("could not find the file \n");
		exit(-1);
	}
	string name;
	while (!fp.eof())
	{
		getline(fp , name);
		if (name.length()) {
			classNames.push_back(name);
		}
	}
	fp.close();



	Net net = readNetFromDarknet(cfg_file, weight_file);//Load network model
	net.setPreferableBackend(DNN_BACKEND_OPENCV);
	net.setPreferableTarget(DNN_TARGET_CPU);

	vector<string> layer_names = net.getLayerNames();
	for (int i = 0; i < layer_names.size(); i++)
	{
		int id = net.getLayerId(layer_names[i]);
		auto layer = net.getLayer(id);
		printf("layer id : %d,type : %s,name : %s \n", id, layer->type.c_str(), layer->name.c_str());
	}
	Mat img = imread(imagename);
	if (img.empty()) {
		fprintf(stderr, "Can not load image %s\n", imagename);
		return -1;
	}
	//namedWindow("image", 0);
	//resizeWindow("image", 600, 800);
	//imshow("image", img);

	Mat blob = blobFromImage(img, 0.00392, Size(416, 416), Scalar(), true, false);
	net.setInput(blob);

	vector<Mat> outputs;
	net.forward(outputs, layer_names);
	vector<Rect> boxes;
	vector<int> classIds;
	vector<float> confidences;
	for (int i = 0; i < outputs.size(); i++)
	{
		float* data = (float*)outputs[i].data;
		for (int j = 0; j < outputs[i].rows; j++, data += outputs[i].cols) {
			Mat scores = outputs[i].row(j).colRange(5, outputs[i].cols);
			Point classIdPoint;
			double confidence;
			minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
			if (confidence > 0.5) {
				int center_x = (int)(data[0] * img.cols);
				int center_y = (int)(data[1] * img.rows);
				int width = (int)(data[2] * img.cols);
				int height = (int)(data[3] * img.rows);
				int left = center_x - width / 2;
				int top = center_y - height / 2;
				classIds.push_back(classIdPoint.x);
				confidences.push_back(confidence);
				boxes.push_back(Rect(left, top, width, height));
				//rectangle(img, Rect(left, top, width, height) , Scalar(0,0,255) , 3 , 8);
			}
		}
	}

	//namedWindow("res", 0);
	//resizeWindow("res", 600, 800);
	//imshow("res", img);

	vector<int> indexes;
	Rect Roi;
	NMSBoxes(boxes, confidences, 0.5, 0.5, indexes);
	cout << "indexes size:" << indexes.size() << endl;
	for (int i = 0; i < indexes.size(); i++)
	{
		int idx = classIds[i];
		Rect box = boxes[i];
		/*if (indexes.size() == 1) {
			box.x = abs(box.x);
			box.y = abs(box.y);
			box.width = abs(box.width);
			box.height = abs(box.height);
			Roi = box;
		}*/
		rectangle(img, box, Scalar(255, 0, 0), 3, 8);

		if (IsNeedShowScore) {
			putText(img, format("score: %.2f,%s", confidences[indexes[i]], classNames[idx].c_str()), box.tl(), FONT_HERSHEY_SIMPLEX, 4, Scalar(255, 0, 255), 3, 8);
		}
		
	}
	namedWindow("nms_res", 0);
	resizeWindow("nms_res", 600, 800);
	imshow("nms_res", img);


	waitKey(0);
	return 0;
}

Ref:

機器/深度學習: 物件偵測 Non-Maximum Suppression (NMS)

https://chih-sheng-huang821.medium.com/%E6%A9%9F%E5%99%A8-%E6%B7%B1%E5%BA%A6%E5%AD%B8%E7%BF%92-%E7%89%A9%E4%BB%B6%E5%81%B5%E6%B8%AC-non-maximum-suppression-nms-aa70c45adffa

非极大值抑制（Non-Maximum Suppression，NMS）

https://www.cnblogs.com/makefile/p/nms.html

Advanced deep learning based object detection methods

https://www.slideshare.net/Brodmann17/advanced-deep-learning-based-object-detection-methods

搜尋此網誌

第25個冬天

YOLOv3鐘錶與類比儀表偵測_藉由NMS濾除重複輸出偵測框

留言

張貼留言

這個網誌中的熱門文章

何謂淨重(Net Weight)、皮重(Tare Weight)與毛重(Gross Weight)

(2021年度)駕訓學科筆試準備題庫歸納分析_法規是非題

Architecture(架構) 和 Framework(框架) 有何不同?_軟體設計前的事前規劃的藍圖概念