Fast vectorized conversion from RGB to BGRA

后端未结

关注

 4  720

夕颜 2020-12-10 07:32

In a follow-up to some previous questions on converting RGB to RGBA, and ARGB to BGR, I would like to speed up a RGB to BGRA conversion with SSE

4条回答

無奈伤痛 (楼主)

2020-12-10 07:49

This is an example of using SSSE3 intrinsics to perform the requested operation. The input and output pointers must be 16-byte aligned, and it operates on a block of 16 pixels at a time.

#include /* in and out must be 16-byte aligned */ void rgb_to_bgrx_sse(unsigned w, const void *in, void *out) { const __m128i *in_vec = in; __m128i *out_vec = out; w /= 16; while (w-- > 0) { /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * in_vec[0] Ra Ga Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf * in_vec[1] Gf Bf Rg Gg Bg Rh Gh Bh Ri Gi Bi Rj Gj Bj Rk Gk * in_vec[2] Bk Rl Gl Bl Rm Gm Bm Rn Gn Bn Ro Go Bo Rp Gp Bp */ __m128i in1, in2, in3; __m128i out; in1 = in_vec[0]; out = _mm_shuffle_epi8(in1, _mm_set_epi8(0xff, 9, 10, 11, 0xff, 6, 7, 8, 0xff, 3, 4, 5, 0xff, 0, 1, 2)); out = _mm_or_si128(out, _mm_set_epi8(0xff, 0, 0, 0, 0xff, 0, 0, 0, 0xff, 0, 0, 0, 0xff, 0, 0, 0)); out_vec[0] = out; in2 = in_vec[1]; in1 = _mm_and_si128(in1, _mm_set_epi8(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0)); out = _mm_and_si128(in2, _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff)); out = _mm_or_si128(out, in1); out = _mm_shuffle_epi8(out, _mm_set_epi8(0xff, 5, 6, 7, 0xff, 2, 3, 4, 0xff, 15, 0, 1, 0xff, 12, 13, 14)); out = _mm_or_si128(out, _mm_set_epi8(0xff, 0, 0, 0, 0xff, 0, 0, 0, 0xff, 0, 0, 0, 0xff, 0, 0, 0)); out_vec[1] = out; in3 = in_vec[2]; in_vec += 3; in2 = _mm_and_si128(in2, _mm_set_epi8(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0)); out = _mm_and_si128(in3, _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff)); out = _mm_or_si128(out, in2); out = _mm_shuffle_epi8(out, _mm_set_epi8(0xff, 1, 2, 3, 0xff, 14, 15, 0, 0xff, 11, 12, 13, 0xff, 8, 9, 10)); out = _mm_or_si128(out, _mm_set_epi8(0xff, 0, 0, 0, 0xff, 0, 0, 0, 0xff, 0, 0, 0, 0xff, 0, 0, 0)); out_vec[2] = out; out = _mm_shuffle_epi8(in3, _mm_set_epi8(0xff, 13, 14, 15, 0xff, 10, 11, 12, 0xff, 7, 8, 9, 0xff, 4, 5, 6)); out = _mm_or_si128(out, _mm_set_epi8(0xff, 0, 0, 0, 0xff, 0, 0, 0, 0xff, 0, 0, 0, 0xff, 0, 0, 0)); out_vec[3] = out; out_vec += 4; } }

0 讨论(0)

查看其它4个回答

发布评论:

提交评论

加载中...

验证码

看不清?

提交回复