Algorithm for fast Drop shadow in GDI+

独自空忆成欢 提交于 2019-11-28 17:59:28

The algorithm came from this blog entry: http://blog.ivank.net/fastest-gaussian-blur.html . It's implementing the last and fastest version, of course. :-)

It's copied directly from my working code, so the external assumptions might reflect that. The function returns a larger bitmap to accommodate the increase in size. In your code, you need to handle this accordingly, of course. It assumes a 32-bit alpha picture but can be easily modified to handle 24-bit only (CHANNELS constant and the PixelFormat values).

public static class DropShadow {
  const int CHANNELS = 4;

  public static Bitmap CreateShadow(Bitmap bitmap, int radius, float opacity) {
    // Alpha mask with opacity
    var matrix = new ColorMatrix(new float[][] {
            new float[] {  0F,  0F,  0F, 0F,      0F }, 
            new float[] {  0F,  0F,  0F, 0F,      0F }, 
            new float[] {  0F,  0F,  0F, 0F,      0F }, 
            new float[] { -1F, -1F, -1F, opacity, 0F },
            new float[] {  1F,  1F,  1F, 0F,      1F }
        });

    var imageAttributes = new ImageAttributes();
    imageAttributes.SetColorMatrix(matrix, ColorMatrixFlag.Default, ColorAdjustType.Bitmap);
    var shadow = new Bitmap(bitmap.Width + 4 * radius, bitmap.Height + 4 * radius);
    using (var graphics = Graphics.FromImage(shadow))
      graphics.DrawImage(bitmap, new Rectangle(2 * radius, 2 * radius, bitmap.Width, bitmap.Height), 0, 0, bitmap.Width, bitmap.Height, GraphicsUnit.Pixel, imageAttributes);

    // Gaussian blur
    var clone = shadow.Clone() as Bitmap;
    var shadowData = shadow.LockBits(new Rectangle(0, 0, shadow.Width, shadow.Height), ImageLockMode.ReadWrite, PixelFormat.Format32bppArgb);
    var cloneData = clone.LockBits(new Rectangle(0, 0, clone.Width, clone.Height), ImageLockMode.ReadWrite, PixelFormat.Format32bppArgb);

    var boxes = DetermineBoxes(radius, 3);
    BoxBlur(shadowData, cloneData, shadow.Width, shadow.Height, (boxes[0] - 1) / 2);
    BoxBlur(shadowData, cloneData, shadow.Width, shadow.Height, (boxes[1] - 1) / 2);
    BoxBlur(shadowData, cloneData, shadow.Width, shadow.Height, (boxes[2] - 1) / 2);

    shadow.UnlockBits(shadowData);
    clone.UnlockBits(cloneData);
    return shadow;
  }

  private static unsafe void BoxBlur(BitmapData data1, BitmapData data2, int width, int height, int radius) {
    byte* p1 = (byte*)(void*)data1.Scan0;
    byte* p2 = (byte*)(void*)data2.Scan0;

    int radius2 = 2 * radius + 1;
    int[] sum = new int[CHANNELS];
    int[] FirstValue = new int[CHANNELS];
    int[] LastValue = new int[CHANNELS];

    // Horizontal
    int stride = data1.Stride;
    for (var row = 0; row < height; row++) {
      int start = row * stride;
      int left = start;
      int right = start + radius * CHANNELS;

      for (int channel = 0; channel < CHANNELS; channel++) {
        FirstValue[channel] = p1[start + channel];
        LastValue[channel] = p1[start + (width - 1) * CHANNELS + channel];
        sum[channel] = (radius + 1) * FirstValue[channel];
      }
      for (var column = 0; column < radius; column++)
        for (int channel = 0; channel < CHANNELS; channel++)
          sum[channel] += p1[start + column * CHANNELS + channel];
      for (var column = 0; column <= radius; column++, right += CHANNELS, start += CHANNELS)
        for (int channel = 0; channel < CHANNELS; channel++) {
          sum[channel] += p1[right + channel] - FirstValue[channel];
          p2[start + channel] = (byte)(sum[channel] / radius2);
        }
      for (var column = radius + 1; column < width - radius; column++, left += CHANNELS, right += CHANNELS, start += CHANNELS)
        for (int channel = 0; channel < CHANNELS; channel++) {
          sum[channel] += p1[right + channel] - p1[left + channel];
          p2[start + channel] = (byte)(sum[channel] / radius2);
        }
      for (var column = width - radius; column < width; column++, left += CHANNELS, start += CHANNELS)
        for (int channel = 0; channel < CHANNELS; channel++) {
          sum[channel] += LastValue[channel] - p1[left + channel];
          p2[start + channel] = (byte)(sum[channel] / radius2);
        }
    }

    // Vertical
    stride = data2.Stride;
    for (int column = 0; column < width; column++) {
      int start = column * CHANNELS;
      int top = start;
      int bottom = start + radius * stride;

      for (int channel = 0; channel < CHANNELS; channel++) {
        FirstValue[channel] = p2[start + channel];
        LastValue[channel] = p2[start + (height - 1) * stride + channel];
        sum[channel] = (radius + 1) * FirstValue[channel];
      }
      for (int row = 0; row < radius; row++)
        for (int channel = 0; channel < CHANNELS; channel++)
          sum[channel] += p2[start + row * stride + channel];
      for (int row = 0; row <= radius; row++, bottom += stride, start += stride)
        for (int channel = 0; channel < CHANNELS; channel++) {
          sum[channel] += p2[bottom + channel] - FirstValue[channel];
          p1[start + channel] = (byte)(sum[channel] / radius2);
        }
      for (int row = radius + 1; row < height - radius; row++, top += stride, bottom += stride, start += stride)
        for (int channel = 0; channel < CHANNELS; channel++) {
          sum[channel] += p2[bottom + channel] - p2[top + channel];
          p1[start + channel] = (byte)(sum[channel] / radius2);
        }
      for (int row = height - radius; row < height; row++, top += stride, start += stride)
        for (int channel = 0; channel < CHANNELS; channel++) {
          sum[channel] += LastValue[channel] - p2[top + channel];
          p1[start + channel] = (byte)(sum[channel] / radius2);
        }
    }
  }

  private static int[] DetermineBoxes(double Sigma, int BoxCount) {
    double IdealWidth = Math.Sqrt((12 * Sigma * Sigma / BoxCount) + 1);
    int Lower = (int)Math.Floor(IdealWidth);
    if (Lower % 2 == 0)
      Lower--;
    int Upper = Lower + 2;

    double MedianWidth = (12 * Sigma * Sigma - BoxCount * Lower * Lower - 4 * BoxCount * Lower - 3 * BoxCount) / (-4 * Lower - 4);
    int Median = (int)Math.Round(MedianWidth);

    int[] BoxSizes = new int[BoxCount];
    for (int i = 0; i < BoxCount; i++)
      BoxSizes[i] = (i < Median) ? Lower : Upper;
    return BoxSizes;
  }

}

I assume it must be straighforward to convert it to Delphi.

Addendum: according to the comments on that blog, if you have an integer radius and three boxes, you can actually forget DetermineBoxes() and use:

BoxBlur(shadowData, cloneData, shadow.Width, shadow.Height, radius - 1);
BoxBlur(shadowData, cloneData, shadow.Width, shadow.Height, radius - 1);
BoxBlur(shadowData, cloneData, shadow.Width, shadow.Height, radius);

Its execution time is negligible compared to the bitmap itself but still...

The reason why I was asking for the code is to see whether you used the "fast bitmap" approach or GetPixel(), SetPixel() methods.

Since you already have this covered, I doubt you'll be able to do much more in terms of performance optimization. GDI+ just wasn't designed for such per-pixel manipulation scenarios. Realistically you should consider implementing a simpler shadow generator, which won't look as fancy, but will not be as processor-intensive.

It all very much depends on your usage scenario (which you haven't really described):

  • Are the images all similar (all tickets or have you used the ticket just as a sample)? If they are, then you could generate the shadow once and reuse that bitmap.
  • You could generate and cache shadowed versions of images (or just shadowed thumbnails) as a background process when the user is doing other stuff.

You could also try out the Gaussian blur in Paint.NET (which uses GDI+ for most stuff) and measure the speed of it there. I doubt you'll be able make it faster than Paint.NET, so it is a good benchmark.

If it's pure performance you're after you could also consider only convolving thin rectangle edge strips of the source image. This way you're not spending time convolving the center (hidden) portion of the image but only portions which have a chance of painting onscreen.

I tested some algorithms and the best was the gaussian blur that Gábor has been implemented. The delay of the algorithm is ~ 20 ms in my tests.

Here is the implementation of it algorithm in Delphi with some changes (it use the freeware Bilsen GDI+ lib):

function CreateBlurShadow(ABitmap: IGPBitmap; ARadius: Integer; AOpacity: Double; AColor: TColor = clNone): IGPBitmap;

  procedure BoxBlur(const AData1, AData2: TGPBitmapData; AWidth, AHeight, ARadius: Integer);
  const
    CHANNELS = 4;
  var
    LScan1, LScan2: PByte;
    LSum, LFirstValue, LLastValue: array [0..CHANNELS-1] of Integer;
    LRadius2, LStride, LStart, LChannel, LLeft, LRight, LBottom, LTop, LRow, LColumn: Integer;
  begin
    LScan1 := AData1.Scan0;
    LScan2 := AData2.Scan0;
    LRadius2 := (2 * ARadius) + 1;
    LStride := AData1.Stride;
    for LRow := 0 to AHeight-1 do
    begin
      LStart := LRow * LStride;
      LLeft := LStart;
      LRight := LStart + ARadius * CHANNELS;
      for LChannel := 0 to CHANNELS-1 do
      begin
        LFirstValue[LChannel] := LScan1[LStart + LChannel];
        LLastValue[LChannel] := LScan1[LStart + ((AWidth - 1) * CHANNELS) + LChannel];
        LSum[LChannel] := (ARadius + 1) * LFirstValue[LChannel];
      end;
      for LColumn := 0 to ARadius-1 do
        for LChannel := 0 to CHANNELS-1 do
          LSum[LChannel] := LSum[LChannel] + LScan1[LStart + (LColumn * CHANNELS) + LChannel];
      for LColumn := 0 to ARadius do
      begin
        for LChannel := 0 to CHANNELS-1 do
        begin
          LSum[LChannel] := LSum[LChannel] + LScan1[LRight + LChannel] - LFirstValue[LChannel];
          LScan2[LStart + LChannel] := Byte(LSum[LChannel] div LRadius2);
        end;
        Inc(LRight, CHANNELS);
        Inc(LStart, CHANNELS);
      end;
      for LColumn := ARadius + 1 to AWidth-ARadius-1 do
      begin
        for LChannel := 0 to CHANNELS-1 do
        begin
          LSum[LChannel] := LSum[LChannel] + LScan1[LRight + LChannel] - LScan1[LLeft + LChannel];
          LScan2[LStart + LChannel] := Byte(LSum[LChannel] div LRadius2);
        end;
        Inc(LLeft, CHANNELS);
        Inc(LRight, CHANNELS);
        Inc(LStart, CHANNELS);
      end;
      for LColumn := AWidth-ARadius to AWidth-1 do
      begin
        for LChannel := 0 to CHANNELS-1 do
        begin
          LSum[LChannel] := LSum[LChannel] + LLastValue[LChannel] - LScan1[LLeft + LChannel];
          LScan2[LStart + LChannel] := Byte(LSum[LChannel] div LRadius2);
        end;
        Inc(LLeft, CHANNELS);
        Inc(LStart, CHANNELS);
      end;
    end;
    LStride := AData2.Stride;
    for LColumn := 0 to AWidth-1 do
    begin
      LStart := LColumn * CHANNELS;
      LTop := LStart;
      LBottom := LStart + (ARadius * LStride);
      for LChannel := 0 to CHANNELS-1 do
      begin
        LFirstValue[LChannel] := LScan2[LStart + LChannel];
        LLastValue[LChannel] := LScan2[LStart + ((AHeight - 1) * LStride) + LChannel];
        LSum[LChannel] := (ARadius + 1) * LFirstValue[LChannel];
      end;
      for LRow := 0 to ARadius-1 do
        for LChannel := 0 to CHANNELS-1 do
          LSum[LChannel] := LSum[LChannel] + LScan2[LStart + (LRow * LStride) + LChannel];
      for LRow := 0 to ARadius do
      begin
        for LChannel := 0 to CHANNELS-1 do
        begin
          LSum[LChannel] := LSum[LChannel] + LScan2[LBottom + LChannel] - LFirstValue[LChannel];
          LScan1[LStart + LChannel] := Byte(LSum[LChannel] div LRadius2);
        end;
        Inc(LBottom, LStride);
        Inc(LStart, LStride);
      end;
      for LRow := ARadius + 1 to AHeight - ARadius - 1 do
      begin
        for LChannel := 0 to CHANNELS-1 do
        begin
          LSum[LChannel] := LSum[LChannel] + LScan2[LBottom + LChannel] - LScan2[LTop + LChannel];
          LScan1[LStart + LChannel] := Byte(LSum[LChannel] div LRadius2);
        end;
        Inc(LTop, LStride);
        Inc(LBottom, LStride);
        Inc(LStart, LStride);
      end;
      for LRow := AHeight - ARadius to AHeight-1 do
      begin
        for LChannel := 0 to CHANNELS-1 do
        begin
          LSum[LChannel] := LSum[LChannel] + LLastValue[LChannel] - LScan2[LTop + LChannel];
          LScan1[LStart + LChannel] := Byte(LSum[LChannel] div LRadius2);
        end;
        Inc(LTop, LStride);
        Inc(LStart, LStride);
      end;
    end;
  end;

const
  INITIAL_MATRIX: array [0..4, 0..4] of Single =
   ((0.5,   0,   0, 0, 0),
    (0,   0.5,   0, 0, 0),
    (0,     0, 0.5, 0, 0),
    (0,     0,   0, 1, 0),
    (0,     0,   0, 0, 1));
var
  LMatrix: TGPColorMatrix;
  LImageAttributes: IGPImageAttributes;
  LShadow, LClone: IGPBitmap;
  LGraphics: IGPGraphics;
  LShadowData, LCloneData: TGPBitmapData;
  LColor: TGPColor;
begin
  ARadius := Max(ARadius, 0);
  LShadow := TGPBitmap.Create(ABitmap.Width + (4 * Cardinal(ARadius)),
    ABitmap.Height + (4 * Cardinal(ARadius)), PixelFormat32bppARGB);
  LGraphics := TGPGraphics.FromImage(LShadow);
  LGraphics.DrawImage(ABitmap, TGPRect.Create(2 * ARadius, 2 * ARadius,
    ABitmap.Width, ABitmap.Height), 0, 0, ABitmap.Width, ABitmap.Height,
    TGPUnit.UnitPixel);
  LClone := LShadow.Clone;
  LShadowData := LShadow.LockBits(TGPRect.Create(0, 0, LShadow.Width, LShadow.Height),
    [ImageLockModeRead, ImageLockModeWrite], PixelFormat32bppARGB);
  LCloneData := LClone.LockBits(TGPRect.Create(0, 0, LClone.Width, LClone.Height),
    [ImageLockModeRead, ImageLockModeWrite], PixelFormat32bppARGB);
  try
    BoxBlur(LShadowData, LCloneData, LShadow.Width, LShadow.Height, ARadius - 1);
    BoxBlur(LShadowData, LCloneData, LShadow.Width, LShadow.Height, ARadius - 1);
    BoxBlur(LShadowData, LCloneData, LShadow.Width, LShadow.Height, ARadius);
  finally
    LShadow.UnlockBits(LShadowData);
    LClone.UnlockBits(LCloneData);
  end;
  if (AColor = clNone) and (AOpacity = 1.0) then
    Result := LShadow
  else
  begin
    LColor := TGPColor.CreateFromColorRef(ColorToRGB(AColor));
    Move(INITIAL_MATRIX[0, 0], LMatrix.M[0, 0], SizeOf(INITIAL_MATRIX));
    LMatrix.M[4, 0] := Min((Integer(LColor.R) - 127) / 127, 1.0);
    LMatrix.M[4, 1] := Min((Integer(LColor.G) - 127) / 127, 1.0);
    LMatrix.M[4, 2] := Min((Integer(LColor.B) - 127) / 127, 1.0);
    LMatrix.M[4, 3] := AOpacity-1;
    LImageAttributes := TGPImageAttributes.Create;
    LImageAttributes.SetColorMatrix(LMatrix, TGPColorMatrixFlags.ColorMatrixFlagsDefault,
      TGPColorAdjustType.ColorAdjustTypeBitmap);
    Result := TGPBitmap.Create(LShadow.Width, LShadow.Height, PixelFormat32bppARGB);
    LGraphics := TGPGraphics.FromImage(Result);
    LGraphics.DrawImage(LShadow, TGPRect.Create(0, 0, LShadow.Width, LShadow.Height),
      0, 0, Result.Width, Result.Height, TGPUnit.UnitPixel, LImageAttributes);
  end;
end;
Fabio Ceconello

I knew that pixel-by-pixel manipulation was quite slower, but never did benchmarks; 70x seems a lot, more than I would expect. Maybe the fact that you're using a managed language contributes to this, because that's one situation in which a VM overhead is maximized. Have you tried to make that part of the program in native code? This link has a native implementation that you could use for a quick test:

http://www.codeproject.com/KB/GDI/Glow_and_Shadow_effects.aspx

Unfortunately their only difference is the use of a language that can generate native code, but they still use a double-level loop to visit the pixels. It would be better if you could use CUDA, for instance, if you can assume the machines in which the app will run have such hardware. But in such case you wouldn't be using GDI+ anymore. Anyway, maybe this other SO question is of help:

Using Graphics Card instead of GDI+ for Image Manipulation

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!