因此,例如,不是使用 UInt32 迭代 32 位像素,而是使用 UInt64 迭代两个像素,并在一个周期内执行两次操作。
private void removeBlueWithTwoPixelIteration()
// think of a big image with data
Bitmap bmp = new Bitmap(15000, 15000, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
TimeSpan startTime, endTime;
unsafe {
UInt64 doublePixel;
UInt32 pixel1;
UInt32 pixel2;
const int readSize = sizeof(UInt64);
const UInt64 rightHalf = UInt32.MaxValue;
PerformanceCounter pf = new PerformanceCounter("System", "System Up Time"); pf.NextValue();
BitmapData bd = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), System.Drawing.Imaging.ImageLockMode.ReadWrite, bmp.PixelFormat);
byte* image = (byte*)bd.Scan0.ToPointer();
startTime = TimeSpan.FromSeconds(pf.NextValue());
for (byte* line = image; line < image + bd.Stride * bd.Height; line += bd.Stride)
for (var pointer = line; pointer < line + bd.Stride; pointer += readSize)
doublePixel = *((UInt64*)pointer);
pixel1 = (UInt32)(doublePixel >> (readSize * 8 / 2)) >> 8; // loose last 8 bits (Blue color)
pixel2 = (UInt32)(doublePixel & rightHalf) >> 8; // loose last 8 bits (Blue color)
*((UInt32*)pointer) = pixel1 << 8; // putback but shift so A R G get back to original positions
*((UInt32*)pointer + 1) = pixel2 << 8; // putback but shift so A R G get back to original positions
endTime = TimeSpan.FromSeconds(pf.NextValue());
MessageBox.Show((endTime - startTime).TotalMilliseconds.ToString());
以下代码逐个像素地执行,比前面的代码慢 70% 左右:
private void removeBlueWithSinglePixelIteration()
// think of a big image with data
Bitmap bmp = new Bitmap(15000, 15000, System.Drawing.Imaging.PixelFormat.Format32bppArgb);
TimeSpan startTime, endTime;
UInt32 singlePixel;
const int readSize = sizeof(UInt32);
PerformanceCounter pf = new PerformanceCounter("System", "System Up Time"); pf.NextValue();
BitmapData bd = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), System.Drawing.Imaging.ImageLockMode.ReadWrite, bmp.PixelFormat);
byte* image = (byte*)bd.Scan0.ToPointer();
startTime = TimeSpan.FromSeconds(pf.NextValue());
for (byte* line = image; line < image + bd.Stride * bd.Height; line += bd.Stride)
for (var pointer = line; pointer < line + bd.Stride; pointer += readSize)
singlePixel = *((UInt32*)pointer) >> 8; // loose B
*((UInt32*)pointer) = singlePixel << 8; // adjust A R G back
endTime = TimeSpan.FromSeconds(pf.NextValue());
MessageBox.Show((endTime - startTime).TotalMilliseconds.ToString());
我正在使用 .NET 4 框架。
对于 C++ 来说,这样的事情可能是真的吗?
注意。32 位与 64 位两种方法的比率相等,但是两种方法在 64 位与 32 位上都慢了 20%?
编辑:正如 Porges 和 arul 所建议的,这可能是因为内存读取次数和分支开销减少。
使用此代码假设图像宽度可被 5 整除,您的速度将提高 400%:
[StructLayout(LayoutKind.Sequential,Pack = 1)]
struct PixelContainer {
public UInt32 pixel1;
public UInt32 pixel2;
public UInt32 pixel3;
public UInt32 pixel4;
public UInt32 pixel5;
int readSize = sizeof(PixelContainer);
// .....
for (var pointer = line; pointer < line + bd.Stride; pointer += readSize)
multiPixel = *((PixelContainer*)pointer);
multiPixel.pixel1 &= 0xFFFFFF00u;
multiPixel.pixel2 &= 0xFFFFFF00u;
multiPixel.pixel3 &= 0xFFFFFF00u;
multiPixel.pixel4 &= 0xFFFFFF00u;
multiPixel.pixel5 &= 0xFFFFFF00u;
*((PixelContainer*)pointer) = multiPixel;