In the following example the C++11 threads take about 50 seconds to execute, but the OMP threads only 5 seconds. Any ideas why? (I can assure you it still holds true if you are doing real work instead of doNothing
, or if you do it in a different order, etc.) I'm on a 16 core machine, too.
#include <iostream> #include <omp.h> #include <chrono> #include <vector> #include <thread> using namespace std; void doNothing() {} int run(int algorithmToRun) { auto startTime = std::chrono::system_clock::now(); for(int j=1; j<100000; ++j) { if(algorithmToRun == 1) { vector<thread> threads; for(int i=0; i<16; i++) { threads.push_back(thread(doNothing)); } for(auto& thread : threads) thread.join(); } else if(algorithmToRun == 2) { #pragma omp parallel for num_threads(16) for(unsigned i=0; i<16; i++) { doNothing(); } } } auto endTime = std::chrono::system_clock::now(); std::chrono::duration<double> elapsed_seconds = endTime - startTime; return elapsed_seconds.count(); } int main() { int cppt = run(1); int ompt = run(2); cout<<cppt<<endl; cout<<ompt<<endl; return 0; }