Optimizing a branch for a known more-common path

前端 未结 4 1125
深忆病人
深忆病人 2020-12-06 07:55

Please consider the following piece of code:

void error_handling();
bool method_impl();

bool method()
{
    const bool res = method_impl();
    if (res == f         


        
4条回答
  •  悲&欢浪女
    2020-12-06 08:22

    Following other answers' suggestions, I benchmarked the solutions. If you consider upvoting this answer, please upvote the others too.

    Benchmark code

    #include 
    #include 
    #include 
    
    // solutions
    #include 
    
    // benchmak
    #include 
    #include 
    #include 
    #include 
    #include 
    
    //
    // Solutions
    //
    namespace
    {
        volatile std::time_t near_futur = -1;
        void error_handling() { std::cerr << "error\n"; }
        bool method_impl() { return std::time(NULL) != near_futur; }
    
        bool method_no_builtin()
        {
            const bool res = method_impl();
            if (res == false) {
                error_handling();
                return false;
            }
            return true;
        }
    
        bool method_builtin()
        {
            const bool res = method_impl();
            if (__builtin_expect(res, 1) == false) {
                error_handling();
                return false;
            }
            return true;
        }
    
        bool method_builtin_incorrect()
        {
            const bool res = method_impl();
            if (__builtin_expect(res, 0) == false) {
                error_handling();
                return false;
            }
            return true;
        }
    
        bool method_rewritten()
        {
            const bool res = method_impl();
            if (res == true) {
                return true;
            } else {
                error_handling();
                return false;
            }
        }
    }
    
    //
    // benchmark
    //
    constexpr std::size_t BENCHSIZE = 10'000'000;
    class Clock
    {
        std::chrono::time_point _start;
    
    public:
        static inline std::chrono::time_point now() { return std::chrono::steady_clock::now(); }
    
        Clock() : _start(now())
        {
        }
    
        template
        std::size_t end()
        {
            return std::chrono::duration_cast(now() - _start).count();
        }
    };
    
    //
    // Entry point
    //
    int main()
    {
        {
            Clock clock;
            bool result = true;
            for (std::size_t i = 0 ; i < BENCHSIZE ; ++i)
            {
                result &= method_no_builtin();
                result &= method_no_builtin();
                result &= method_no_builtin();
                result &= method_no_builtin();
                result &= method_no_builtin();
                result &= method_no_builtin();
                result &= method_no_builtin();
                result &= method_no_builtin();
                result &= method_no_builtin();
                result &= method_no_builtin();
            }
            const double unit_time = clock.end() / static_cast(BENCHSIZE);
            std::cout << std::setw(40) << "method_no_builtin(): " << std::setprecision(3) << unit_time << " ns\n";
        }
        {
            Clock clock;
            bool result = true;
            for (std::size_t i = 0 ; i < BENCHSIZE ; ++i)
            {
                result &= method_builtin();
                result &= method_builtin();
                result &= method_builtin();
                result &= method_builtin();
                result &= method_builtin();
                result &= method_builtin();
                result &= method_builtin();
                result &= method_builtin();
                result &= method_builtin();
                result &= method_builtin();
            }
            const double unit_time = clock.end() / static_cast(BENCHSIZE);
            std::cout << std::setw(40) << "method_builtin(): " << std::setprecision(3) << unit_time << " ns\n";
        }
        {
            Clock clock;
            bool result = true;
            for (std::size_t i = 0 ; i < BENCHSIZE ; ++i)
            {
                result &= method_builtin_incorrect();
                result &= method_builtin_incorrect();
                result &= method_builtin_incorrect();
                result &= method_builtin_incorrect();
                result &= method_builtin_incorrect();
                result &= method_builtin_incorrect();
                result &= method_builtin_incorrect();
                result &= method_builtin_incorrect();
                result &= method_builtin_incorrect();
                result &= method_builtin_incorrect();
            }
            const double unit_time = clock.end() / static_cast(BENCHSIZE);
            std::cout << std::setw(40) << "method_builtin_incorrect(): " << std::setprecision(3) << unit_time << " ns\n";
        }
        {
            Clock clock;
            bool result = true;
            for (std::size_t i = 0 ; i < BENCHSIZE ; ++i)
            {
                result &= method_rewritten();
                result &= method_rewritten();
                result &= method_rewritten();
                result &= method_rewritten();
                result &= method_rewritten();
                result &= method_rewritten();
                result &= method_rewritten();
                result &= method_rewritten();
                result &= method_rewritten();
                result &= method_rewritten();
            }
            const double unit_time = clock.end() / static_cast(BENCHSIZE);
            std::cout << std::setw(40) << "method_rewritten(): " << std::setprecision(3) << unit_time << " ns\n";
        }
    }
    

    Benchmark results

    g++ -std=c++14 -O2 -Wall -Wextra -Werror main.cpp

                   method_no_builtin(): 42.8 ns
                      method_builtin(): 44.4 ns
            method_builtin_incorrect(): 51.4 ns
                    method_rewritten(): 39.3 ns
    

    Demo

    g++ -std=c++14 -O3 -Wall -Wextra -Werror main.cpp

                   method_no_builtin(): 32.3 ns
                      method_builtin(): 31.1 ns
            method_builtin_incorrect(): 35.6 ns
                    method_rewritten(): 30.5 ns
    

    Demo

    Conclusion

    The difference between those optimizations are too small to come to any conclusion other than: if there is a performance gain to find in optimizing a branch for a known more common path, this gain is too small to be worth the trouble and the loss in readability.

提交回复
热议问题