opencv DNN模块之YOLO(Darknet)对象检测

我只是一个虾纸丫 提交于 2020-02-10 22:14:03

原理

自行百度,本人不擅长

  • YOLO 来自darknet对象检测框架
  • 基于COCO数据集,能检测80个类别
  • YOLO V3版本
    https://pireddie.com/darknet/yolo

每种网络模型对应的输入数据

可以查看每种模型的模型二进制文件名称,网络描述文件名称,中至化处理参数,数据样本尺寸,描述label文件名称,rgb通道顺序以及典型应用场景等信息
链接地址:https://github.com/opencv/opencv/blob/master/samples/dnn/models.yml

################################################################################
# Object detection models.
################################################################################

。。。

# YOLO object detection family from Darknet (https://pjreddie.com/darknet/yolo/)
# Might be used for all YOLOv2, TinyYolov2 and YOLOv3
yolo:
  model: "yolov3.weights"
  config: "yolov3.cfg"
  mean: [0, 0, 0]
  scale: 0.00392
  width: 416
  height: 416
  rgb: true
  classes: "object_detection_classes_yolov3.txt"
  sample: "object_detection"

tiny-yolo-voc:
  model: "tiny-yolo-voc.weights"
  config: "tiny-yolo-voc.cfg"
  mean: [0, 0, 0]
  scale: 0.00392
  width: 416
  height: 416
  rgb: true
  classes: "object_detection_classes_pascal_voc.txt"
  sample: "object_detection"

。。。

网络输入输出

  • 输入层 [Nx3xHxW]通道顺序:RGB,均值0,放缩1/255
  • 多个输出层,输出结构:[C,center_x,center_y,width,heigjt]
  • 通过NMS去掉重复BOX,因为有多个输出层,可能对象被检测多次

代码


#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace std;
using namespace cv;
using namespace cv::dnn;

#define PIC_PATH "/work/opencv_pic/"
#define PIC_NAME "pedestrian.png

const size_t width = 416;
const size_t height = 416;
string weight_file = "/work/opencv_dnn/yolov3/yolov3.weights";
string cfg_file = "/work/opencv_dnn/yolov3/yolov3.cfg";
string label_map = "/work/opencv_dnn/yolov3/object_detection_classes_yolov3.txt";
vector<string> readLabelMaps(void);
int main(void)
{
    string pic = string(PIC_PATH)+string(PIC_NAME);
    Mat src;
    src = imread(pic);
    if(src.empty())
    {
        printf("pic read err\n");
        return -1;
    }

    //创建并载入神经网络
    Net net = readNetFromDarknet(cfg_file,weight_file);

    if(net.empty())
    {
        printf("read caffe model data err\n");
        return -1;
    }


    //设置计算后台
    net.setPreferableBackend(DNN_BACKEND_OPENCV);
    net.setPreferableTarget(DNN_TARGET_CPU);

    //获取各层信息
    vector<string> layers_names = net.getLayerNames();

    for(size_t i=0;i<layers_names.size();i++)
    {
        int id = net.getLayerId(layers_names[i]);
        auto layer = net.getLayer(id);
        printf("layer id:%d,type:%s,name:%s\n",id,layer->type.c_str(),layer->name.c_str());
    }

    //获取所有输出层
    vector<string> outNames = net.getUnconnectedOutLayersNames();
    for(size_t i=0;i<outNames.size();i++)
    {
        printf("output layer name:%s\n",outNames[i].c_str());
    }
    //图片格式转换
    Mat blobimage = blobFromImage(src,0.00392, Size(width, height), Scalar(), true, false);

    //网络输入数据
    net.setInput(blobimage);

    //获取识别数据  yolo有多个输出层
    vector<Mat> outs;
    net.forward(outs,outNames);

    //每层都有 矩形 置信度 label索引
    vector<Rect> boxes;
    vector<int> classIds;
    vector<float> confidences;

    //获取名称索引
    vector<string> names = readLabelMaps();

    //解析
    for (size_t i = 0; i < outs.size(); i++)
    {
             // ¿ªÊ¼½âÎöÿ¸öÊä³öblob
             float* data = (float*)outs[i].data;
             //解析每个输出层的每行数据
             for (int j = 0; j < outs[i].rows; j++, data += outs[i].cols)
             {
                     //剔除每行 box本身数据 拿取box位置信息
                     Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
                     Point classIdPoint;   //最大值位置
                     double confidence;    //获取置信值
                     minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
                     if (confidence > 0.5)
                     {
                             int centerx = (int)(data[0] * src.cols);
                             int centery = (int)(data[1] * src.rows);
                             int width = (int)(data[2] * src.cols);
                             int height = (int)(data[3] * src.rows);
                             int left = centerx - width / 2;
                             int top = centery - height / 2;
                             classIds.push_back(classIdPoint.x);
                             confidences.push_back(confidence);
                             boxes.push_back(Rect(left, top, width, height));
                     }
             }
     }
    vector<int> indexes;
    //删除重复box
    NMSBoxes(boxes, confidences, 0.5, 0.5, indexes);
    for (size_t i = 0; i < indexes.size(); i++)
    {
            int idx = classIds[i];
            Rect box = boxes[i];
            rectangle(src, box, Scalar(0, 0, 255), 2, 8);
            putText(src, names[idx].c_str(), box.tl(), FONT_HERSHEY_SIMPLEX, 1, Scalar(255, 255, 0), 2, 8);
    }

    imshow("dst image",src);

    waitKey(0);
    destroyAllWindows();
    return 0;
}

vector<string> readLabelMaps()
{
        vector<string> labelNames;
        std::ifstream fp(label_map);
        if (!fp.is_open())
        {
                printf("could not open file...\n");
                exit(-1);
        }
        string one_line;
        string display_name;
        while (!fp.eof())
        {
                std::getline(fp, one_line);

                if (one_line.length()) {
                       labelNames.push_back(one_line) ;
                }
        }
        fp.close();
        return labelNames;
}


效果

在这里插入图片描述

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!