Implementation of Bi-Cubic resize

青春壹個敷衍的年華 提交于 2019-12-05 21:46:58

Aside from mixing floating point and integer arithmetic, I suspect you're getting numeric overflow / underflow with some of your intermediate values.

One easy fix is to just be consistent and use floating point throughout. Right now you have:

unsigned char C[5] = { 0 };

for (unsigned i = 0; i < dest_height; ++i)
{
    for (unsigned j = 0; j < dest_width; ++j)
    {
        const int x = int(tx * j);
        const int y = int(ty * i);
        const double dx = tx * j - x;
        const double dy = ty * i - y;

        for (int k = 0; k < 3; ++k)
        {
            for (int jj = 0; jj < 4; ++jj)
            {
                const int idx = y - 1 + jj;
                unsigned char a0 = get_subpixel(bmap, idx, x, k);
                unsigned char d0 = get_subpixel(bmap, idx, x - 1, k) - a0;
                unsigned char d2 = get_subpixel(bmap, idx, x + 1, k) - a0;
                unsigned char d3 = get_subpixel(bmap, idx, x + 2, k) - a0;
                unsigned char a1 = -1.0 / 3 * d0 + d2 - 1.0 / 6 * d3;
                unsigned char a2 = 1.0 / 2 * d0 + 1.0 / 2 * d2;
                unsigned char a3 = -1.0 / 6 * d0 - 1.0 / 2 * d2 + 1.0 / 6 * d3;
                C[jj] = a0 + a1 * dx + a2 * dx * dx + a3 * dx * dx * dx;

                d0 = C[0] - C[1];
                d2 = C[2] - C[1];
                d3 = C[3] - C[1];
                a0 = C[1];
                a1 = -1.0 / 3 * d0 + d2 -1.0 / 6 * d3;
                a2 = 1.0 / 2 * d0 + 1.0 / 2 * d2;
                a3 = -1.0 / 6 * d0 - 1.0 / 2 * d2 + 1.0 / 6 * d3;
                out[i * row_stride + j * channels + k] = a0 + a1 * dy + a2 * dy * dy + a3 * dy * dy * dy;
            }
        }
    }
}

You have a mixture of unsigned char, int and double. Each one of those 1.0 / 3 converts your 8-bit data to double-precision float, and then the assignment truncates it back.

Instead, why not just use float throughout?

float C[5] = { 0 };

for (unsigned i = 0; i < dest_height; ++i)
{
    for (unsigned j = 0; j < dest_width; ++j)
    {
        const float x = float(tx * j);
        const float y = float(ty * i);
        const float dx = tx * j - x;
        const float dy = ty * i - y;

        for (int k = 0; k < 3; ++k)
        {
            for (int jj = 0; jj < 4; ++jj)
            {
                const int idx = y - 1 + jj;
                float a0 = get_subpixel(bmap, idx, x, k);
                float d0 = get_subpixel(bmap, idx, x - 1, k) - a0;
                float d2 = get_subpixel(bmap, idx, x + 1, k) - a0;
                float d3 = get_subpixel(bmap, idx, x + 2, k) - a0;
                float a1 = -(1.0f / 3.0f) * d0 + d2 - (1.0f / 6.0f) * d3;
                float a2 =          0.5f  * d0 +              0.5f *  d2;
                float a3 = -(1.0f / 6.0f) * d0 - 0.5f * d2 + (1.0f / 6.0f) * d3;
                C[jj] = a0 + a1 * dx + a2 * dx * dx + a3 * dx * dx * dx;

                d0 = C[0] - C[1];
                d2 = C[2] - C[1];
                d3 = C[3] - C[1];
                a0 = C[1];
                a1 = -(1.0f / 3.0f) * d0 + d2 -(1.0f / 6.0f) * d3;
                a2 =          0.5f  * d0 +             0.5f  * d2;
                a3 = -(1.0f / 6.0f) * d0 - 0.5f * d2 + (1.0f / 6.0f) * d3;
                out[i * row_stride + j * channels + k] = saturate( a0 + a1 * dy + a2 * dy * dy + a3 * dy * dy * dy );
            }
        }
    }
}

Then define a function saturate that does this:

inline unsigned char saturate( float x )
{
    return x > 255.0f ? 255
         : x < 0.0f   ? 0
         :              unsigned char(x);
}

That will fix your overflow issues, and give you better precision and likely better performance too.

If you need to further improve the performance, then you should look into fixed point arithmetic. But for now, I think the above implementation is better.

Also, one other thought: You should be able to get some further efficiencies by precomputing dx * dx, dx * dx * dx, and so on:

float C[5] = { 0 };

for (unsigned i = 0; i < dest_height; ++i)
{
    for (unsigned j = 0; j < dest_width; ++j)
    {
        const float x = float(tx * j);
        const float y = float(ty * i);
        const float dx = tx * j - x, dx2 = dx * dx, dx3 = dx2 * dx;
        const float dy = ty * i - y, dy2 = dy * dy, dy3 = dy2 * dy;

        for (int k = 0; k < 3; ++k)
        {
            for (int jj = 0; jj < 4; ++jj)
            {
                const int idx = y - 1 + jj;
                float a0 = get_subpixel(bmap, idx, x, k);
                float d0 = get_subpixel(bmap, idx, x - 1, k) - a0;
                float d2 = get_subpixel(bmap, idx, x + 1, k) - a0;
                float d3 = get_subpixel(bmap, idx, x + 2, k) - a0;
                float a1 = -(1.0f / 3.0f) * d0 + d2 - (1.0f / 6.0f) * d3;
                float a2 =          0.5f  * d0 +              0.5f *  d2;
                float a3 = -(1.0f / 6.0f) * d0 - 0.5f * d2 + (1.0f / 6.0f) * d3;
                C[jj] = a0 + a1 * dx + a2 * dx2 + a3 * dx3;

                d0 = C[0] - C[1];
                d2 = C[2] - C[1];
                d3 = C[3] - C[1];
                a0 = C[1];
                a1 = -(1.0f / 3.0f) * d0 + d2 -(1.0f / 6.0f) * d3;
                a2 =          0.5f  * d0 +             0.5f  * d2;
                a3 = -(1.0f / 6.0f) * d0 - 0.5f * d2 + (1.0f / 6.0f) * d3;
                out[i * row_stride + j * channels + k] = saturate( a0 + a1 * dy + a2 * dy2 + a3 * dy3 );
            }
        }
    }
}
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!