I have a __m256d vector packed with four 64-bit floating-point values.
I need to find the horizontal maximum of the vector\'s elements and store the result in a double-p
//Use the code to find the horizontal maximum
__m256 v1 = initial_vector;//example v1=[1 2 3 4 5 6 7 8]
__m256 v2 = _mm256_permute_ps(v1,(int)147);//147 is control code for rotate left by upper 4 elements and lower 4 elements separately v2=[2 3 4 1 6 7 8 5]
__m256 v3 = _mm256_max_ps(v1,v2);//v3=[2 3 4 4 6 7 8 8]
__m256 v4 = _mm256_permute_ps(v3,(int)147);//v4=[3 4 4 2 7 8 8 6]
__m256 v5 = _mm256_max_ps(v3,v4);//v5=[3 4 4 4 7 8 8 8]
__m256 v6 = _mm256_permute_ps(v5,(int)147);//v6=[4 4 4 3 8 8 8 7]
__m256 v7 = _mm256_max_ps(v5,v6);//contains max of upper four elements and lower 4 elements. v7=[4 4 4 4 8 8 8 8]
//to get max of this horizontal array. Note that the highest end of either upper or lower can contain the maximum
float ALIGN max_array[8];
float horizontal_max;
_mm256_store_ps(max_array, v7);
if(max_array[3] > max_array[7])
{
horizontal_max = max_array[3];
}
else
{
horizontal_max = max_array[7];
}