How much overhead is there in calling a function in C++?

前端 未结 16 1673
予麋鹿
予麋鹿 2020-12-02 07:29

A lot of literature talks about using inline functions to \"avoid the overhead of a function call\". However I haven\'t seen quantifiable data. What is the actual overhead o

16条回答
  •  不知归路
    2020-12-02 08:22

    There is not much overhead at all, especially with small (inline-able) functions or even classes.

    The following example has three different tests that are each run many, many times and timed. The results are always equal to the order of a couple 1000ths of a unit of time.

    #include 
    #include 
    #include 
    
    double sum;
    double a = 42, b = 53;
    
    //#define ITERATIONS 1000000 // 1 million - for testing
    //#define ITERATIONS 10000000000 // 10 billion ~ 10s per run
    //#define WORK_UNIT sum += a + b
    /* output
    8.609619s wall, 8.611255s user + 0.000000s system = 8.611255s CPU(100.0%)
    8.604478s wall, 8.611255s user + 0.000000s system = 8.611255s CPU(100.1%)
    8.610679s wall, 8.595655s user + 0.000000s system = 8.595655s CPU(99.8%)
    9.5e+011 9.5e+011 9.5e+011
    */
    
    #define ITERATIONS 100000000 // 100 million ~ 10s per run
    #define WORK_UNIT sum += std::sqrt(a*a + b*b + sum) + std::sin(sum) + std::cos(sum)
    /* output
    8.485689s wall, 8.486454s user + 0.000000s system = 8.486454s CPU (100.0%)
    8.494153s wall, 8.486454s user + 0.000000s system = 8.486454s CPU (99.9%)
    8.467291s wall, 8.470854s user + 0.000000s system = 8.470854s CPU (100.0%)
    2.50001e+015 2.50001e+015 2.50001e+015
    */
    
    
    // ------------------------------
    double simple()
    {
       sum = 0;
       boost::timer::auto_cpu_timer t;
       for (unsigned long long i = 0; i < ITERATIONS; i++)
       {
          WORK_UNIT;
       }
       return sum;
    }
    
    // ------------------------------
    void call6()
    {
       WORK_UNIT;
    }
    void call5(){ call6(); }
    void call4(){ call5(); }
    void call3(){ call4(); }
    void call2(){ call3(); }
    void call1(){ call2(); }
    
    double calls()
    {
       sum = 0;
       boost::timer::auto_cpu_timer t;
    
       for (unsigned long long i = 0; i < ITERATIONS; i++)
       {
          call1();
       }
       return sum;
    }
    
    // ------------------------------
    class Obj3{
    public:
       void runIt(){
          WORK_UNIT;
       }
    };
    
    class Obj2{
    public:
       Obj2(){it = new Obj3();}
       ~Obj2(){delete it;}
       void runIt(){it->runIt();}
       Obj3* it;
    };
    
    class Obj1{
    public:
       void runIt(){it.runIt();}
       Obj2 it;
    };
    
    double objects()
    {
       sum = 0;
       Obj1 obj;
    
       boost::timer::auto_cpu_timer t;
       for (unsigned long long i = 0; i < ITERATIONS; i++)
       {
          obj.runIt();
       }
       return sum;
    }
    // ------------------------------
    
    
    int main(int argc, char** argv)
    {
       double ssum = 0;
       double csum = 0;
       double osum = 0;
    
       ssum = simple();
       csum = calls();
       osum = objects();
    
       std::cout << ssum << " " << csum << " " << osum << std::endl;
    }
    

    The output for running 10,000,000 iterations (of each type: simple, six function calls, three object calls) was with this semi-convoluted work payload:

    sum += std::sqrt(a*a + b*b + sum) + std::sin(sum) + std::cos(sum)
    

    as follows:

    8.485689s wall, 8.486454s user + 0.000000s system = 8.486454s CPU (100.0%)
    8.494153s wall, 8.486454s user + 0.000000s system = 8.486454s CPU (99.9%)
    8.467291s wall, 8.470854s user + 0.000000s system = 8.470854s CPU (100.0%)
    2.50001e+015 2.50001e+015 2.50001e+015
    

    Using a simple work payload of

    sum += a + b
    

    Gives the same results except a couple orders of magnitude faster for each case.

提交回复
热议问题