I\'d like to parallelize the following piece of code but am new to openmp and creating parallel code.
std::vector good_matches;
for (int i = 0;
I showed how to do this here c-openmp-parallel-for-loop-alternatives-to-stdvector
Make private versions of the std::vector and fill the shared std::vector in a critical section like this:
std::vector<DMatch> good_matches;
#pragma omp parallel
{
std::vector<DMatch> good_matches_private;
#pragma omp for nowait
for (int i = 0; i < descriptors_A.rows; i++) {
if (matches_RM[i].distance < 3 * min_dist) {
good_matches_private.push_back(matches_RM[i]);
}
}
#pragma omp critical
good_matches.insert(good_matches.end(), good_matches_private.begin(), good_matches_private.end());
}
One possibility may be to use private vectors for each thread and combine them in the end:
#include<omp.h>
#include<algorithm>
#include<iterator>
#include<iostream>
#include<vector>
using namespace std;
int main()
{
vector<int> global_vector;
vector< vector<int> > buffers;
#pragma omp parallel
{
auto nthreads = omp_get_num_threads();
auto id = omp_get_thread_num();
//
// Correctly set the number of buffers
//
#pragma omp single
{
buffers.resize( nthreads );
}
//
// Each thread works on its chunk
// If order is important maintain schedule static
//
#pragma omp for schedule(static)
for(size_t ii = 0; ii < 100; ++ii) {
if( ii % 2 != 0 ) { // Any other condition will do
buffers[id].push_back(ii);
}
}
//
// Combine buffers together
//
#pragma omp single
{
for( auto & buffer : buffers) {
move(buffer.begin(),buffer.end(),back_inserter(global_vector));
}
}
}
//
// Print the result
//
for( auto & x : global_vector) {
cout << x << endl;
}
return 0;
}
The actual speed-up depends only on the amount of work done inside each loop.
TBB's concurrent_vector
acts much like std::vector
, but allows parallel calls to push_back
.