Timing CUDA operations

后端 未结 5 1934
走了就别回头了
走了就别回头了 2020-12-02 23:48

I need to time a CUDA kernel execution. The Best Practices Guide says that we can use either events or standard timing functions like clock() in Windows. My pro

5条回答
  •  既然无缘
    2020-12-03 00:20

    A satisfactory answer has been already given to your question.

    I have constructed classes for timing C/C++ as well as CUDA operations and want to share with other hoping they could be helpful to next users. You will just need to add the 4 files reported below to your project and #include the two header files as

    // --- Timing includes
    #include "TimingCPU.h"
    #include "TimingGPU.cuh"
    

    The two classes can be used as follows.

    Timing CPU section

    TimingCPU timer_CPU;
    
    timer_CPU.StartCounter();
    CPU perations to be timed
    std::cout << "CPU Timing = " << timer_CPU.GetCounter() << " ms" << std::endl;
    

    Timing GPU section

    TimingGPU timer_GPU;
    timer_GPU.StartCounter();
    GPU perations to be timed
    std::cout << "GPU Timing = " << timer_GPU.GetCounter() << " ms" << std::endl;
    

    In both the cases, the timing is in milliseconds. Also, the two classes can be used under linux or windows.

    Here are the 4 files:

    TimingCPU.cpp

    /**************/
    /* TIMING CPU */
    /**************/
    
    #include "TimingCPU.h"
    
    #ifdef __linux__
    
        #include 
        #include 
    
        TimingCPU::TimingCPU(): cur_time_(0) { StartCounter(); }
    
        TimingCPU::~TimingCPU() { }
    
        void TimingCPU::StartCounter()
        {
            struct timeval time;
            if(gettimeofday( &time, 0 )) return;
            cur_time_ = 1000000 * time.tv_sec + time.tv_usec;
        }
    
        double TimingCPU::GetCounter()
        {
            struct timeval time;
            if(gettimeofday( &time, 0 )) return -1;
    
            long cur_time = 1000000 * time.tv_sec + time.tv_usec;
            double sec = (cur_time - cur_time_) / 1000000.0;
            if(sec < 0) sec += 86400;
            cur_time_ = cur_time;
    
            return 1000.*sec;
        }
    
    #elif _WIN32 || _WIN64
        #include 
        #include 
    
        struct PrivateTimingCPU {
            double  PCFreq;
            __int64 CounterStart;
        };
    
        // --- Default constructor
        TimingCPU::TimingCPU() { privateTimingCPU = new PrivateTimingCPU; (*privateTimingCPU).PCFreq = 0.0; (*privateTimingCPU).CounterStart = 0; }
    
        // --- Default destructor
        TimingCPU::~TimingCPU() { }
    
        // --- Starts the timing
        void TimingCPU::StartCounter()
        {
            LARGE_INTEGER li;
            if(!QueryPerformanceFrequency(&li)) std::cout << "QueryPerformanceFrequency failed!\n";
    
            (*privateTimingCPU).PCFreq = double(li.QuadPart)/1000.0;
    
            QueryPerformanceCounter(&li);
            (*privateTimingCPU).CounterStart = li.QuadPart;
        }
    
        // --- Gets the timing counter in ms
        double TimingCPU::GetCounter()
        {
            LARGE_INTEGER li;
            QueryPerformanceCounter(&li);
            return double(li.QuadPart-(*privateTimingCPU).CounterStart)/(*privateTimingCPU).PCFreq;
        }
    #endif
    

    TimingCPU.h

    // 1 micro-second accuracy
    // Returns the time in seconds
    
    #ifndef __TIMINGCPU_H__
    #define __TIMINGCPU_H__
    
    #ifdef __linux__
    
        class TimingCPU {
    
            private:
                long cur_time_;
    
            public:
    
                TimingCPU();
    
                ~TimingCPU();
    
                void StartCounter();
    
                double GetCounter();
        };
    
    #elif _WIN32 || _WIN64
    
        struct PrivateTimingCPU;
    
        class TimingCPU
        {
            private:
                PrivateTimingCPU *privateTimingCPU;
    
            public:
    
                TimingCPU();
    
                ~TimingCPU();
    
                void StartCounter();
    
                double GetCounter();
    
        }; // TimingCPU class
    
    #endif
    
    #endif
    

    TimingGPU.cu

    /**************/
    /* TIMING GPU */
    /**************/
    
    #include "TimingGPU.cuh"
    
    #include 
    #include 
    
    struct PrivateTimingGPU {
        cudaEvent_t     start;
        cudaEvent_t     stop;
    };
    
    // default constructor
    TimingGPU::TimingGPU() { privateTimingGPU = new PrivateTimingGPU; }
    
    // default destructor
    TimingGPU::~TimingGPU() { }
    
    void TimingGPU::StartCounter()
    {
        cudaEventCreate(&((*privateTimingGPU).start));
        cudaEventCreate(&((*privateTimingGPU).stop));
        cudaEventRecord((*privateTimingGPU).start,0);
    }
    
    void TimingGPU::StartCounterFlags()
    {
        int eventflags = cudaEventBlockingSync;
    
        cudaEventCreateWithFlags(&((*privateTimingGPU).start),eventflags);
        cudaEventCreateWithFlags(&((*privateTimingGPU).stop),eventflags);
        cudaEventRecord((*privateTimingGPU).start,0);
    }
    
    // Gets the counter in ms
    float TimingGPU::GetCounter()
    {
        float   time;
        cudaEventRecord((*privateTimingGPU).stop, 0);
        cudaEventSynchronize((*privateTimingGPU).stop);
        cudaEventElapsedTime(&time,(*privateTimingGPU).start,(*privateTimingGPU).stop);
        return time;
    }
    

    TimingGPU.cuh

    #ifndef __TIMING_CUH__
    #define __TIMING_CUH__
    
    /**************/
    /* TIMING GPU */
    /**************/
    
    // Events are a part of CUDA API and provide a system independent way to measure execution times on CUDA devices with approximately 0.5
    // microsecond precision.
    
    struct PrivateTimingGPU;
    
    class TimingGPU
    {
        private:
            PrivateTimingGPU *privateTimingGPU;
    
        public:
    
            TimingGPU();
    
            ~TimingGPU();
    
            void StartCounter();
            void StartCounterFlags();
    
            float GetCounter();
    
    }; // TimingCPU class
    
    #endif
    

提交回复
热议问题