I wrote an algorithm to convert a RGB image to a YUV420. I spend a long time trying to make it faster but I haven\'t find any other way to boost its efficiency, so now I turn to
Unroll your loop, and get rid of the if in the inner loop. But do not run over the image data 3 times, and it is even faster!
void Bitmap2Yuv420p_calc2(uint8_t *destination, uint8_t *rgb, size_t width, size_t height)
{
size_t image_size = width * height;
size_t upos = image_size;
size_t vpos = upos + upos / 4;
size_t i = 0;
for( size_t line = 0; line < height; ++line )
{
if( !(line % 2) )
{
for( size_t x = 0; x < width; x += 2 )
{
uint8_t r = rgb[3 * i];
uint8_t g = rgb[3 * i + 1];
uint8_t b = rgb[3 * i + 2];
destination[i++] = ((66*r + 129*g + 25*b) >> 8) + 16;
destination[upos++] = ((-38*r + -74*g + 112*b) >> 8) + 128;
destination[vpos++] = ((112*r + -94*g + -18*b) >> 8) + 128;
r = rgb[3 * i];
g = rgb[3 * i + 1];
b = rgb[3 * i + 2];
destination[i++] = ((66*r + 129*g + 25*b) >> 8) + 16;
}
}
else
{
for( size_t x = 0; x < width; x += 1 )
{
uint8_t r = rgb[3 * i];
uint8_t g = rgb[3 * i + 1];
uint8_t b = rgb[3 * i + 2];
destination[i++] = ((66*r + 129*g + 25*b) >> 8) + 16;
}
}
}
}
In my tests, this was about 25% faster than your accepted answer (VS 2010, depending on whether x86 or x64 is enabled.)