Real-time template matching - OpenCV, C++

问题

I am trying to implement real-time tracking using templates. I wish to update the template with every frame. The main modifications I have done are:

1) separated the template matching and minmaxLoc into separate modules namely, TplMatch() and minmax() functions, respectively.

2) Inside the track() function, the select_flag is kept always true so that new template is copied to 'myTemplate' with every iteration.

3) The last 3 lines of function track() are to update the template (roiImg).

4) Also, I have removed any arguments to track() function, since, img and roiImg are global variables and hence no need to pass them to functions.

Following is the code:

#include <iostream>
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>

#include <sstream>


using namespace cv;
using namespace std;

Point point1, point2; /* vertical points of the bounding box */
int drag = 0;
Rect rect; /* bounding box */
Mat img, roiImg; /* roiImg - the part of the image in the bounding box */
int select_flag = 0;
bool go_fast = false;

Mat mytemplate;


///------- template matching -----------------------------------------------------------------------------------------------

Mat TplMatch( Mat &img, Mat &mytemplate )
{
  Mat result;

  matchTemplate( img, mytemplate, result, CV_TM_SQDIFF_NORMED );
  normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );

  return result;
}


///------- Localizing the best match with minMaxLoc ------------------------------------------------------------------------

Point minmax( Mat &result )
{
  double minVal, maxVal;
  Point  minLoc, maxLoc, matchLoc;

  minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
  matchLoc = minLoc;

  return matchLoc;
}


///------- tracking --------------------------------------------------------------------------------------------------------

void track()
{
    if (select_flag)
    {
        roiImg.copyTo(mytemplate);
//         select_flag = false;
        go_fast = true;
    }

//     imshow( "mytemplate", mytemplate ); waitKey(0);

    Mat result  =  TplMatch( img, mytemplate );
    Point match =  minmax( result ); 

    rectangle( img, match, Point( match.x + mytemplate.cols , match.y + mytemplate.rows ), CV_RGB(255, 255, 255), 0.5 );

    std::cout << "match: " << match << endl;

    /// latest match is the new template
    Rect ROI = cv::Rect( match.x, match.y, mytemplate.cols, mytemplate.rows );
    roiImg = img( ROI );
    imshow( "roiImg", roiImg ); //waitKey(0);
}


///------- MouseCallback function ------------------------------------------------------------------------------------------

void mouseHandler(int event, int x, int y, int flags, void *param)
{
    if (event == CV_EVENT_LBUTTONDOWN && !drag)
    {
        /// left button clicked. ROI selection begins
        point1 = Point(x, y);
        drag = 1;
    }

    if (event == CV_EVENT_MOUSEMOVE && drag)
    {
        /// mouse dragged. ROI being selected
        Mat img1 = img.clone();
        point2 = Point(x, y);
        rectangle(img1, point1, point2, CV_RGB(255, 0, 0), 3, 8, 0);
        imshow("image", img1);
    }

    if (event == CV_EVENT_LBUTTONUP && drag)
    {
        point2 = Point(x, y);
        rect = Rect(point1.x, point1.y, x - point1.x, y - point1.y);
        drag = 0;
        roiImg = img(rect);
//  imshow("MOUSE roiImg", roiImg); waitKey(0);
    }

    if (event == CV_EVENT_LBUTTONUP)
    {
        /// ROI selected
        select_flag = 1;
        drag = 0;
    }

}



///------- Main() ----------------------------------------------------------------------------------------------------------

int main()
{
    int k;
/*    
///open webcam
    VideoCapture cap(0);
    if (!cap.isOpened())
      return 1;*/

    ///open video file
    VideoCapture cap;
    cap.open( "Megamind.avi" );
    if ( !cap.isOpened() )
    {   cout << "Unable to open video file" << endl;    return -1;    }
/*    
    /// Set video to 320x240
     cap.set(CV_CAP_PROP_FRAME_WIDTH, 320);
     cap.set(CV_CAP_PROP_FRAME_HEIGHT, 240);*/

    cap >> img;
    GaussianBlur( img, img, Size(7,7), 3.0 );
    imshow( "image", img );

    while (1)
    {
        cap >> img;
        if ( img.empty() )
            break;

    // Flip the frame horizontally and add blur
    cv::flip( img, img, 1 );
    GaussianBlur( img, img, Size(7,7), 3.0 );

        if ( rect.width == 0 && rect.height == 0 )
            cvSetMouseCallback( "image", mouseHandler, NULL );
        else
            track();

        imshow("image", img);
//  waitKey(100);   k = waitKey(75);
    k = waitKey(go_fast ? 30 : 10000);
        if (k == 27)
            break;
    }

    return 0;
}

The updated template is not being tracked. I am not able to figure out why this is happening since I am updating my template (roiImg) with each iteration. The match value from minmax() function is returning the same point (coordinates) every-time. Test video is availbale at: http://www.youtube.com/watch?v=vpnkk7N2E0Q&feature=youtu.be Please look into it and guide ahead...thanks a lot!

回答1:

I get your original code from this revision of your question: https://stackoverflow.com/revisions/20180073/3

I made the smallest change to your original code, my resulting code is the following:

#include <iostream>
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>

#include <sstream>


using namespace cv;
using namespace std;

Point point1, point2; /* vertical points of the bounding box */
int drag = 0;
Rect rect; /* bounding box */
Mat img, roiImg; /* roiImg - the part of the image in the bounding box */
int select_flag = 0;
bool go_fast = false;

Mat mytemplate;


///------- template matching -----------------------------------------------------------------------------------------------

Mat TplMatch( Mat &img, Mat &mytemplate )
{
  Mat result;

  matchTemplate( img, mytemplate, result, CV_TM_SQDIFF_NORMED );
  normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );

  return result;
}


///------- Localizing the best match with minMaxLoc ------------------------------------------------------------------------

Point minmax( Mat &result )
{
  double minVal, maxVal;
  Point  minLoc, maxLoc, matchLoc;

  minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
  matchLoc = minLoc;

  return matchLoc;
}


///------- tracking --------------------------------------------------------------------------------------------------------

void track()
{
    if (select_flag)
    {
        //roiImg.copyTo(mytemplate);
//         select_flag = false;
        go_fast = true;
    }

//     imshow( "mytemplate", mytemplate ); waitKey(0);

    Mat result  =  TplMatch( img, mytemplate );
    Point match =  minmax( result ); 

    rectangle( img, match, Point( match.x + mytemplate.cols , match.y + mytemplate.rows ), CV_RGB(255, 255, 255), 0.5 );

    std::cout << "match: " << match << endl;

    /// latest match is the new template
    Rect ROI = cv::Rect( match.x, match.y, mytemplate.cols, mytemplate.rows );
    roiImg = img( ROI );
    roiImg.copyTo(mytemplate);
    imshow( "roiImg", roiImg ); //waitKey(0);
}


///------- MouseCallback function ------------------------------------------------------------------------------------------

void mouseHandler(int event, int x, int y, int flags, void *param)
{
    if (event == CV_EVENT_LBUTTONDOWN && !drag)
    {
        /// left button clicked. ROI selection begins
        point1 = Point(x, y);
        drag = 1;
    }

    if (event == CV_EVENT_MOUSEMOVE && drag)
    {
        /// mouse dragged. ROI being selected
        Mat img1 = img.clone();
        point2 = Point(x, y);
        rectangle(img1, point1, point2, CV_RGB(255, 0, 0), 3, 8, 0);
        imshow("image", img1);
    }

    if (event == CV_EVENT_LBUTTONUP && drag)
    {
        point2 = Point(x, y);
        rect = Rect(point1.x, point1.y, x - point1.x, y - point1.y);
        drag = 0;
        roiImg = img(rect);
        roiImg.copyTo(mytemplate);
//  imshow("MOUSE roiImg", roiImg); waitKey(0);
    }

    if (event == CV_EVENT_LBUTTONUP)
    {
        /// ROI selected
        select_flag = 1;
        drag = 0;
    }

}



///------- Main() ----------------------------------------------------------------------------------------------------------

int main()
{
    int k;
/*    
///open webcam
    VideoCapture cap(0);
    if (!cap.isOpened())
      return 1;*/

    ///open video file
    VideoCapture cap;
    cap.open( "Megamind.avi" );
    if ( !cap.isOpened() )
    {   cout << "Unable to open video file" << endl;    return -1;    }
/*    
    /// Set video to 320x240
     cap.set(CV_CAP_PROP_FRAME_WIDTH, 320);
     cap.set(CV_CAP_PROP_FRAME_HEIGHT, 240);*/

    cap >> img;
    GaussianBlur( img, img, Size(7,7), 3.0 );
    imshow( "image", img );

    while (1)
    {
        cap >> img;
        if ( img.empty() )
            break;

    // Flip the frame horizontally and add blur
    cv::flip( img, img, 1 );
    GaussianBlur( img, img, Size(7,7), 3.0 );

        if ( rect.width == 0 && rect.height == 0 )
            cvSetMouseCallback( "image", mouseHandler, NULL );
        else
            track();

        imshow("image", img);
//  waitKey(100);   k = waitKey(75);
    k = waitKey(go_fast ? 30 : 10000);
        if (k == 27)
            break;
    }

    return 0;
}

The video at https://www.youtube.com/watch?v=rBCopeneCos shows a test of the above program.

I would avoid the use of global variable because I think they do not help in understanding where the problems lie; furthermore I also would pay attention to the shallow vs deep copy for OpenCV's Mat class, as 1'' wrote in his answer:

OpenCV's Mat class is simply a header for the actual image data, which it contains a pointer to. The operator= copies the pointer (and the other information in the header, like the image dimensions) so that both Mats share the same data. This means that modifying the data in one Mat also changes it in the other. This is called a "shallow" copy, since only the top layer (the header) is copied, not the lower layer (the data).

To make a copy of the underlying data (called a "deep copy"), use the clone() method. You can find information about it on the page that you linked to.

Edit about the drift: In comment Real-time template matching - OpenCV, C++, learner asks about the tracking drift. Looking at the video https://www.youtube.com/watch?v=rBCopeneCos we see that at the beginning of the video the program is tracking the girl's right eye while at 0:15 it starts to track the girl's eyebrows, at 0:19 it starts to track the boy's eyebrows and it never tracks anymore the girl's eye, for example at 0:27 it tracks the girl's right eyebrow while the girl's right eye is clearly visible in the image.

This drift from tracking the eye to tracking the eyebrow is normal in a simple code as the one I posted and the explanation is quite simple: see the video at https://www.youtube.com/watch?v=sGHEu3u9XvI, the video starts with the tracking (contents of the black rectangle) of the playing card, then I remove the playing card from the scene and the tracking black rectangle "drifts" to the bottom left of the scene; after all we are continuosly updating the template and so the behavior is correct: the program stops to track the playing card and starts to track a white background and so you have the "drift"... in other words, your TplMatch() function will always return a valid result image and your current implementation of minmax() will always return a valid a minimum.

回答2:

You can follow the OpenCV tutorial "Template Matching". Your track function may contain the code to find the template in the current frame; a simple code is based on the matchTemplate and minMaxLoc functions.

The interesting issue related to the "real-time" part of your question is to succeed in finding the match, if present, within the time between the current frame and the next one.

Edit:

The following quick-and-dirty code and the video at http://www.youtube.com/watch?v=vpnkk7N2E0Q&feature=youtu.be shows what I mean for tracking.

Since I do not have a webcam I slightly modified your code to just use a video, this one https://code.ros.org/trac/opencv/export/7237/trunk/opencv/samples/cpp/tutorial_code/HighGUI/video-input-psnr-ssim/video/Megamind.avi

I then add track function and some logic to slow down the video until I choose a ROI and after that playing the video at normal speed.

#include <iostream>
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>

#include <sstream>


using namespace cv;
using namespace std;

Point point1, point2; /* vertical points of the bounding box */
int drag = 0;
Rect rect; /* bounding box */
Mat img, roiImg; /* roiImg - the part of the image in the bounding box */
int select_flag = 0;
bool go_fast = false;

Mat mytemplate;

void track(cv::Mat &img, const cv::Mat &templ, const cv::Rect &r )
{
    static int n = 0;

    if (select_flag)
    {
        templ.copyTo(mytemplate);
        select_flag = false;
        go_fast = true;
    }


    cv::Mat result;
    /// Do the Matching and Normalize
    matchTemplate( img, mytemplate, result, CV_TM_SQDIFF_NORMED );
    normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );

    /// Localizing the best match with minMaxLoc
    double minVal; double maxVal; Point minLoc; Point maxLoc;
    Point matchLoc;

    minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
    matchLoc = minLoc;

    rectangle( img, matchLoc, Point( matchLoc.x + mytemplate.cols , matchLoc.y + mytemplate.rows ), CV_RGB(255, 255, 255), 3 );

    std::cout << matchLoc << "\n";
}

///MouseCallback function

void mouseHandler(int event, int x, int y, int flags, void *param)
{
    if (event == CV_EVENT_LBUTTONDOWN && !drag)
    {
        /* left button clicked. ROI selection begins */
        point1 = Point(x, y);
        drag = 1;
    }

    if (event == CV_EVENT_MOUSEMOVE && drag)
    {
        /* mouse dragged. ROI being selected */
        Mat img1 = img.clone();
        point2 = Point(x, y);
        rectangle(img1, point1, point2, CV_RGB(255, 0, 0), 3, 8, 0);
        imshow("image", img1);
    }

    if (event == CV_EVENT_LBUTTONUP && drag)
    {
        point2 = Point(x, y);
        rect = Rect(point1.x, point1.y, x - point1.x, y - point1.y);
        drag = 0;
        roiImg = img(rect);
    }

    if (event == CV_EVENT_LBUTTONUP)
    {
        /* ROI selected */
        select_flag = 1;
        drag = 0;
    }

}


///Main function

int main()
{
    int k;
    /*
        VideoCapture cap(0);
        if (!cap.isOpened())
        return 1;
    */
    VideoCapture cap;
    //cap.open("~/Downloads/opencv-2.4.4/samples/cpp/tutorial_code/HighGUI/video-input-psnr-ssim/video/Megamind.avi");
    cap.open("./Megamind.avi");
    if (!cap.isOpened())
    {
        printf("Unable to open video file\n");
        return -1;
    }

    /*
        // Set video to 320x240
        cap.set(CV_CAP_PROP_FRAME_WIDTH, 320);
        cap.set(CV_CAP_PROP_FRAME_HEIGHT, 240);
        */

    cap >> img;
    imshow("image", img);

    while (1)
    {
        cap >> img;
        if (img.empty())
            break;

        if (rect.width == 0 && rect.height == 0)
            cvSetMouseCallback("image", mouseHandler, NULL);
        else
            track(img, roiImg, rect);

        if (select_flag == 1)
            imshow("Template", roiImg);

        imshow("image", img);
        k = waitKey(go_fast ? 30 : 10000);
        if (k == 27)
            break;

    }


    return 0;
}

回答3:

You can also have a general introduction to the subject starting from this wikipedia page http://en.wikipedia.org/wiki/Video_tracking

来源：https://stackoverflow.com/questions/20180073/real-time-template-matching-opencv-c

标签

c++

OpenCV

image-processing

computer-vision

template-matching