I've not done any parallel programming for a while now, nor any Maths for that matter, but don't you want to split the rows of the matrix in parallel, rather than the columns?
What happens if you try this:
double start_time = clock();
#pragma omp parallel private(i) num_threads(4)
{
tid = omp_get_thread_num();
world_size = omp_get_num_threads();
printf("Threads: %d\n",world_size);
#pragma omp parallel for private(y) shared(results, vector, matrix)
for(y = 0; y < matrix_size ; y++){
for(i = 0; i < matrix_size; i++){
results[y] = results[y] + vector[i]*matrix[i][y];
}
}
}
double end_time = clock();
double result_time = (end_time - start_time) / CLOCKS_PER_SEC;
printf("Time: %f\n", result_time);
Also, are you sure everything's OK compiling and linking with openMP
?