阅读提示:
《Delphi图像处理》系列以效率为侧重点,一般代码为PASCAL,核心代码采用BASM。
《C++图像处理》系列以代码清晰,可读性为主,全部使用C++代码。
尽可能保持二者内容一致,可相互对照。
本文代码必须包括文章《Delphi图像处理 -- 数据类型及公用过程》中的ImageData.pas单元。
本文在《GDI+ ColorMatrix的完全揭秘》的ColorMatrix原理揭秘的基础上,用Delphi代码来完整实现GDI+的ColorMatrix功能。
GDI+中设置ColorMatrix时有2个枚举选项,在实际运用中极少使用,所以代码中以GDI+设置ColorMatrix的缺省方式实现。
先给一个简易的浮点版本,因为该过程没有考虑子图处理,所以称之为简易版本,主要方便阅读者理解ColorMatrix实现原理:
procedure SetColorMatrixF(Data: TImageData; Matrix: TColorMatrix);
var
I, J, Count: Integer;
P: PRGBQuad;
MainValue: Boolean;
v: Integer;
procedure SetPixel;
var
Pixel: array[0..3] of Byte;
I, J: Integer;
ps: PByteArray;
begin
ps := Pointer(P);
// 注意:为使矩阵与ARGB排列顺序一致,以下运算中调整了行列的顺序
for I := 0 to 3 do
begin
if I < 3 then
J := 2 - I
else
J := I;
// 如果只存在主对角线数据,只处理颜色缩放
if MainValue then
Pixel[J] := Round(Matrix[I, I] * ps[J])
// 否则,处理所有颜色变换
else
Pixel[J] := Max(0, Min(255, Round(Matrix[0, I] * ps[2] +
Matrix[1, I] * ps[1] +
Matrix[2, I] * ps[0] +
Matrix[3, I] * ps[3] +
Matrix[4, I] * 255)));
end;
for I := 0 to 3 do
ps[I] := Pixel[I];
end;
begin
// 处理矩阵中大与255的值(取模),并判断主对角线外是否存在数据
MainValue := True;
for I := 0 to 4 do
for J := 0 to 4 do
begin
v := Round(Matrix[I, J]) div 256;
if v > 0 then
Matrix[I, J] := Matrix[I, J] - 256.0 * v;
if (I <> J) and (Matrix[I, J] <> 0) then
MainValue := False;
end;
Count := Data.Width * Data.Height;
P := Data.Scan0;
for I := 1 to Count do
begin
SetPixel;
Inc(P);
end;
end;
因代码已经有了注释,而实现原理、公式已经在《GDI+ ColorMatrix的完全揭秘》中进行了详尽的介绍,所以本文不再累述。
该过程代码的特点是简单易读,缺点是效率较低,在我的P4 2.8G计算机上,处理一张千万像素的照片,耗时为1000ms左右(不包括GDI+图像格式转换耗时。千万像素的24位格式图像转换为32位格式,耗时就达650ms)。
下面是一个MMX BASM代码的整数ColorMatrix实现过程:
过程定义:
// 设置图像颜色矩阵。参数:
// Dest输出图,Source原图,Data自身操作图像
// Matrix颜色矩阵
procedure ImageSetColorMatrix(var Data: TImageData; Matrix: TColorMatrix); overload;
{$IF RTLVersion >= 17.00}inline;{$IFEND}
procedure ImageSetColorMatrix(var Dest: TImageData;
const Source: TImageData; Matrix: TColorMatrix); overload;
实现代码:
type
PARGBQuadW = ^TARGBQuadW;
TARGBQuadW = packed record
wBlue: Word;
wGreen: Word;
wRed: Word;
wAlpha: Word;
end;
procedure ImageSetColorMatrix(var Dest: TImageData;
const Source: TImageData; Matrix: TColorMatrix);
asm
push esi
push edi
push ebx
mov ebx, eax
mov edi, ecx // edi = matrix
mov esi, 4 // for (i = 4; i >= 0; i --)
fldz // {
@@iLoop:
mov ecx, 4 // for (j = 4; j >= 0; j --)
@@jLoop: // {
cmp ecx, esi
je @@1
mov eax, esi
imul eax, 5
add eax, ecx
fcom dword ptr [edi+eax*4]
fstsw ax
sahf
je @@1
fstp st(0) // if (i != j && matrix[i, j] != 0)
jmp @@TransformAll // goto TransformAll
@@1:
dec ecx
jns @@jLoop // }
dec esi
jns @@iLoop // }
fstp st(0)
fwait
// 处理颜色缩放(主对角线的数据)
sub esp, 8+2
mov dword ptr [esp], 256
fild dword ptr [esp]
fld st(0)
fmul dword ptr [edi+(2*5+2)*4]
fistp dword ptr [esp] // matrixI[0, 0] = matrix[2, 2] * 256
fld st(0)
fmul dword ptr [edi+(1*5+1)*4]
fistp dword ptr [esp+2] // matrixI[0, 1] = matrix[1, 1] * 256
fld st(0)
fmul dword ptr [edi+(0*5+0)*4]
fistp dword ptr [esp+4] // matrixI[0, 2] = matrix[0, 0] * 256
fmul dword ptr [edi+(3*5+3)*4]
fistp dword ptr [esp+6] // matrixI[0, 3] = matrix[3, 3] * 256
mov eax, ebx
call _SetCopyRegs
pxor mm7, mm7
movq mm1, [esp] // mm1 = m44 m11 m22 m33
@@yLoop:
push ecx
@@xLoop:
movd mm0, [esi]
punpcklbw mm0, mm7 // mm0 = 00 A 00 R 00 G 00 B
pmullw mm0, mm1 // mm0 = A*m44 R*m11 G*m22 B*m33
psrlw mm0, 8 // mm0 = A*m44/256 R*m11/256 G*m22/256 B*m33/256
packuswb mm0, mm0 // mm0 = 00 00 00 00 An Rn Gn Bn
movd [edi], mm0
add esi, 4
add edi, 4
loop @@xLoop
add esi, eax
add edi, ebx
pop ecx
dec edx
jnz @@yLoop
add esp, 8+2
jmp @@end
// 处理全部颜色变换
@@TransformAll:
sub esp, 5*8+2 // 浮点颜色矩阵行列交换转换为128倍整数
mov dword ptr [esp], 128
fild dword ptr [esp]
mov esi, esp // esi = matrixI
mov eax, edi
mov ecx, 4 // for (i = 0; i < 4; i ++)
@@cvtLoop: // {
fld st(0)
fmul dword ptr [edi]
fistp dword ptr [esi] // matrixI[i, 0] = matrix[0, i] * 128
fld st(0)
fmul dword ptr [edi+1*5*4]
fistp dword ptr [esi+2] // matrixI[i, 1] = matrix[1, i] * 128
fld st(0)
fmul dword ptr [edi+2*5*4]
fistp dword ptr [esi+4] // matrixI[i, 2] = matrix[2, i] * 128
fld st(0)
fmul dword ptr [edi+3*5*4]
fistp dword ptr [esi+6] // matrixI[i, 3] = matrix[3, i] * 128
add esi, 8
add edi, 4
loop @@cvtLoop // }
fstp st(0)
add eax, 4*5*4 // 浮点数平移量转换为255倍整数
mov dword ptr [esi], 255
fild dword ptr [esi]
mov ecx, 4 // for (j = 0; j < 4; j ++)
@@tLoop:
fld st(0)
fmul dword ptr [eax]
fistp dword ptr [esi] // matrixI[4, j] = matrix[4, j] * 255
add esi, 2
add eax, 4
loop @@tLoop
fstp st(0)
mov esi, esp // 红蓝(0、2列)交换
mov ecx, 5 // for (i = 0; i < 5; i ++)
@@swapLoop: // matrixI[i, 0] <--> matrixI[i, 2]
mov ax, [esi].TARGBQuadW.wBlue
xchg ax, [esi].TARGBQuadW.wRed
mov [esi].TARGBQuadW.wBlue, ax
add esi, 8
loop @@swapLoop
mov eax, ebx
call _SetCopyRegs
pxor mm7, mm7
pcmpeqb mm4, mm4 // mm4 = FF FF FF FF FF FF FF FF
psrlw mm4, 15 // mm4 = 00 01 00 01 00 01 00 01
@@yLoopA:
push ecx
@@xLoopA:
movd mm0, [esi]
punpcklbw mm0, mm7 // mm0 = 00 A 00 R 00 G 00 B
movq mm1, mm0
movq mm2, mm0
movq mm3, mm0
// esp+4: ecx push stack
pmaddwd mm0, [esp+16+4] // mm0 = A*m43+R*m13 G*m23+B*m33 蓝色行
pmaddwd mm1, [esp+8+4] // mm1 = A*m42+R*m12 G*m22+B*m32 绿色行
pmaddwd mm2, [esp+4] // mm2 = A*m41+R*m11 G*m21+B*m31 红色行
pmaddwd mm3, [esp+24+4] // mm3 = A*m44+R*m14 G*m24+B*m34 Alpha行
psrad mm0, 7 // mm0 = A*m43+R*m13/128 G*m23+B*m33/128
psrad mm1, 7 // mm1 = A*m42+R*m12/128 G*m22+B*m32/128
psrad mm2, 7 // mm2 = A*m41+R*m11/128 G*m21+B*m31/128
psrad mm3, 7 // mm3 = A*m44+R*m14/128 G*m24+B*m34/128
packssdw mm0, mm1 // mm0 = Ag+Rg Gg+Bg Ab+Rb Gb+Bb
packssdw mm2, mm3 // mm2 = Aa+Ra Ga+Ba Ar+Rr Gr+Br
pmaddwd mm0, mm4 // mm0 = Ag+Rg+Gg+Bg=Gn Ab+Rb+Gb+Bb=Bn
pmaddwd mm2, mm4 // mm2 = Aa+Ra+Ga+Ba=An Ar+Rr+Gr+Br=Rn
packssdw mm0, mm2 // mm0 = 00 An 00 Rn 00 Gn 00 Bn
paddw mm0, [esp+32+4] // mm0 = An+At Rn+Rt Gn+Gt Bn+Bt 平移行
packuswb mm0, mm0 // mm0 = 00 00 00 00 An Rn Gn Bn
movd [edi], mm0
add esi, 4
add edi, 4
loop @@xLoopA
add esi, eax
add edi, ebx
pop ecx
dec edx
jnz @@yLoopA
add esp, 5*8+2
@@end:
emms
@@Exit:
pop ebx
pop edi
pop esi
end;
procedure ImageSetColorMatrix(var Data: TImageData; Matrix: TColorMatrix);
begin
ImageSetColorMatrix(Data, Data, Matrix);
end;
该过程中作了更详细的注释,其特点是处理速度较快。在我的机器上,不包括图像格式转换耗时,处理千万像素图片主对角线数据耗时不到50ms,而处理全部变换耗时350-400ms。
下面是一个测试程序代码。该测试代码界面与《GDI+ for VCL基础 -- 颜色调整矩阵ColorMatrix详解》是一样的。有兴趣的朋友可以同里面的测试代码作一下比较。
unit main2;
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, Buttons, Grids, ExtCtrls, Gdiplus, ImageData;
type
TForm1 = class(TForm)
Label1: TLabel;
PaintBox1: TPaintBox;
SpeedButton1: TSpeedButton;
SpeedButton2: TSpeedButton;
SpeedButton3: TSpeedButton;
SpeedButton4: TSpeedButton;
StringGrid1: TStringGrid;
BitBtn1: TBitBtn;
BitBtn3: TBitBtn;
BitBtn2: TBitBtn;
procedure FormCreate(Sender: TObject);
procedure FormDestroy(Sender: TObject);
procedure StringGrid1DrawCell(Sender: TObject; ACol, ARow: Integer;
Rect: TRect; State: TGridDrawState);
procedure StringGrid1GetEditText(Sender: TObject; ACol, ARow: Integer;
var Value: string);
procedure PaintBox1Paint(Sender: TObject);
procedure BitBtn1Click(Sender: TObject);
procedure BitBtn2Click(Sender: TObject);
procedure BitBtn3Click(Sender: TObject);
procedure SpeedButton2Click(Sender: TObject);
procedure SpeedButton3Click(Sender: TObject);
procedure SpeedButton1Click(Sender: TObject);
procedure SpeedButton4Click(Sender: TObject);
procedure StringGrid1SetEditText(Sender: TObject; ACol, ARow: Integer;
const Value: string);
private
{ Private declarations }
Source: TGpBitmap;
Dest: TGpBitmap;
SrcData: TImageData;
DstData: TImageData;
Matrix: TColorMatrix;
function CheckFloatStr(Str: string): Double;
procedure InitColorMatrix;
public
{ Public declarations }
end;
var
Form1: TForm1;
implementation
{$R *.dfm}
procedure TForm1.BitBtn1Click(Sender: TObject);
begin
ImageSetColorMatrix(DstData, SrcData, Matrix);
PaintBox1.Invalidate;
with StringGrid1 do
begin
Cells[Col, Row] := FloatToStr(Matrix[Row, Col]);
Invalidate;
SetFocus;
end;
end;
procedure TForm1.BitBtn2Click(Sender: TObject);
begin
InitColorMatrix;
BitBtn1.Click;
end;
procedure TForm1.BitBtn3Click(Sender: TObject);
begin
Close;
end;
function TForm1.CheckFloatStr(Str: string): Double;
var
i, len: Integer;
dec, neg: Boolean;
s: string;
begin
Result := 0;
len := Length(Str);
if len = 0 then Exit;
dec := False;
neg := False;
i := 1;
s := '';
if (Str[i] = '-') or (Str[i] = '+') then
begin
if Str[i] = '-' then neg := True;
Inc(i);
end;
while (i <= len) do
begin
if Str[i] = '.' then
begin
if dec then Break;
dec := True;
end
else if (Str[i] < '0') or (Str[i] > '9') then Break;
s := s + Str[i];
Inc(i);
end;
if Length(s) > 0 then
begin
if neg then s := '-' + s;
Result := StrToFloat(s);
end;
end;
procedure TForm1.FormCreate(Sender: TObject);
var
Bmp: TGpBitmap;
Data: TBitmapData;
R: TGpRect;
begin
// 从文件装入图像到Bmp
Bmp := TGpBitmap.Create('..\..\media\100_0349.jpg');
R := GpRect(0, 0, Bmp.Width, Bmp.Height);
// 分别建立新的源和目标图像数据到SrcData和DstData
SrcData := NewImageData(R.Width, R.Height);
DstData := NewImageData(R.Width, R.Height);
// 将Bmp图像数据分别锁定拷贝到SrcData和DstData
Data := TBitmapData(SrcData);
Data := Bmp.LockBits(R, [imRead, imWrite, imUserInputBuf], pf32bppARGB);
Bmp.UnlockBits(Data);
Data.Scan0 := DstData.Scan0;
Data := Bmp.LockBits(R, [imRead, imWrite, imUserInputBuf], pf32bppARGB);
Bmp.UnlockBits(Data);
Bmp.Free;
// 分别用图像数据SrcData和DstData建立位图Source和Dest
// 注:图像数据结构用于数据处理,位图用于显示,这样即可绑定数据结构和位图,
// 又能避免每次处理图像数据时的锁定和解锁操作
Source := TGpBitmap.Create(SrcData.Width, SrcData.Height, SrcData.Stride,
pf32bppARGB, SrcData.Scan0);
Dest := TGpBitmap.Create(DstData.Width, DstData.Height, DstData.Stride,
pf32bppARGB, DstData.Scan0);
InitColorMatrix;
end;
procedure TForm1.FormDestroy(Sender: TObject);
begin
Dest.Free;
Source.Free;
FreeImageData(DstData);
FreeImageData(SrcData);
end;
procedure TForm1.InitColorMatrix;
var
i, j: Integer;
begin
for i := 0 to 4 do
begin
for j := 0 to 4 do
if i = j then Matrix[i, j] := 1 else Matrix[i, j] := 0;
end;
end;
procedure TForm1.PaintBox1Paint(Sender: TObject);
var
g: TGpGraphics;
begin
g := TGpGraphics.Create(PaintBox1.Canvas.Handle);
try
g.DrawImage(Source, 10, 10);
g.DrawImage(Dest, SrcData.Width + 20, 10);
finally
g.Free;
end;
end;
procedure TForm1.SpeedButton1Click(Sender: TObject);
var
i: Integer;
begin
InitColorMatrix;
for i := 0 to 2 do
begin
Matrix[0, i] := 0.30;
Matrix[1, i] := 0.59;
Matrix[2, i] := 0.11;
end;
BitBtn1.Click;
end;
procedure TForm1.SpeedButton2Click(Sender: TObject);
var
i: Integer;
begin
InitColorMatrix;
for i := 0 to 2 do
Matrix[4, i] := 0.10;
BitBtn1.Click;
end;
procedure TForm1.SpeedButton3Click(Sender: TObject);
begin
InitColorMatrix;
Matrix[0, 0] := -1;
Matrix[1, 1] := -1;
Matrix[2, 2] := -1;
BitBtn1.Click;
end;
procedure TForm1.SpeedButton4Click(Sender: TObject);
begin
InitColorMatrix;
Matrix[3, 3] := 0.5;
BitBtn1.Click;
end;
procedure TForm1.StringGrid1DrawCell(Sender: TObject; ACol, ARow: Integer;
Rect: TRect; State: TGridDrawState);
var
Text: string;
begin
Text := Format('%.2f', [Matrix[ARow, ACol]]);
StringGrid1.Canvas.FillRect(Rect);
StringGrid1.Canvas.Pen.Color := clBtnShadow;
StringGrid1.Canvas.Rectangle(Rect);
InflateRect(Rect, -2, -2);
DrawText(StringGrid1.Canvas.Handle, PChar(text), Length(text), &Rect, DT_RIGHT);
end;
procedure TForm1.StringGrid1GetEditText(Sender: TObject; ACol, ARow: Integer;
var Value: string);
begin
Value := Format('%.2f', [Matrix[ARow, ACol]]);
end;
procedure TForm1.StringGrid1SetEditText(Sender: TObject; ACol, ARow: Integer;
const Value: string);
begin
Matrix[ARow, ACol] := CheckFloatStr(Value);
end;
end.
下面是运行效果图:
procedure ImageSetColorMatrix(var Dest: TImageData;
const Source: TImageData; Matrix: TColorMatrix); overload;
asm
push ebp
push esi
push edi
push ebx
// ebp为16字节对齐的128位栈内存地址
sub esp, 32
mov ebp, esp
add ebp, 16
and ebp, -16
// 检查颜色矩阵除主对角线和虚拟列外的数据项,如不等于零,执行全部颜色变换
mov edi, ecx
mov esi, 4 // for (i = 4; i >= 0; i --)
@@iLoop: // {
mov ecx, 3 // for (j = 3; j >= 0; j --)
@@jLoop: // {
cmp ecx, esi // if (i == j) continue
je @@1
lea ebx, [esi+esi*4]
add ebx, ecx // index = i * 5 + j
cmp dword ptr[edi+ebx*4], 0
jne @@Transform // if (Matrix[Index]) goto @@Transform
@@1:
dec ecx
jns @@jLoop // }
dec esi
jns @@iLoop // }
// 处理颜色缩放
mov ebx, [edi+(2*5+2)*4]
mov ecx, [edi+(1*5+1)*4]
mov [ebp], ebx
mov [ebp+4], ecx
mov ebx, [edi+(0*5+0)*4]
mov ecx, [edi+(3*5+3)*4]
mov [ebp+8], ebx
mov [ebp+12], ecx
movaps xmm1, [ebp] // xmm1 = m44 m11 m22 m33
pxor xmm7, xmm7
call _SetCopyRegs
@@yLoop_Scale:
push ecx
@@xLoop_Scale:
movd xmm0, [esi]
punpcklbw xmm0, xmm7
punpcklwd xmm0, xmm7
cvtdq2ps xmm0, xmm0
mulps xmm0, xmm1 // xmm0 = [A R G B] * [m44 m11 m22 m33]
cvtps2dq xmm0, xmm0
packssdw xmm0, xmm7
packuswb xmm0, xmm7
movd [edi], xmm0
add esi, 4
add edi, 4
loop @@xLoop_Scale
pop ecx
add esi, eax
add edi, ebx
dec edx
jnz @@yLoop_Scale
jmp @@Exit
// 处理全部的颜色变换
@@Transform:
// 颜色矩阵按行分别装入sse寄存器,不包括虚拟位列
movups xmm1, [edi+0*5*4]
movups xmm2, [edi+1*5*4]
movups xmm3, [edi+2*5*4]
movups xmm4, [edi+3*5*4]
movups xmm5, [edi+4*5*4]
// 平移行乘上255
mov ebx, 255
cvtsi2ss xmm6, ebx
pshufd xmm6, xmm6, 0
mulps xmm5, xmm6
// 交换每行的红与蓝位置
pshufd xmm1, xmm1, 11000110b
pshufd xmm2, xmm2, 11000110b
pshufd xmm3, xmm3, 11000110b
pshufd xmm4, xmm4, 11000110b
pshufd xmm5, xmm5, 11000110b
// 平移行保存在栈中
movaps [ebp], xmm5
pxor xmm7, xmm7
call _SetCopyRegs
@@yLoop:
push ecx
@@xLoop:
movd xmm0, [esi]
punpcklbw xmm0, xmm7
punpcklwd xmm0, xmm7
cvtdq2ps xmm0, xmm0
pshufd xmm5, xmm0, 0
pshufd xmm6, xmm0, 01010101b
mulps xmm5, xmm3 // vb = blue * m3
mulps xmm6, xmm2 // vg = green * m2
addps xmm5, [ebp] // vb += m5
addps xmm5, xmm6 // vb += vg
pshufd xmm6, xmm0, 10101010b
pshufd xmm0, xmm0, 11111111b
mulps xmm6, xmm1 // vr = red * m1
mulps xmm0, xmm4 // va = alpha * m4
addps xmm0, xmm6 // v = va + vr
addps xmm0, xmm5 // v += vb
cvtps2dq xmm0, xmm0
packssdw xmm0, xmm7
packuswb xmm0, xmm7
movd [edi], xmm0
add esi, 4
add edi, 4
loop @@xLoop
pop ecx
add esi, eax
add edi, ebx
dec edx
jnz @@yLoop
@@Exit:
add esp, 32
pop ebx
pop edi
pop esi
pop ebp
end;
procedure ImageSetColorMatrix(var Data: TImageData; Matrix: TColorMatrix); overload;
begin
ImageSetColorMatrix(Data, Data, Matrix);
end;
//---------------------------------------------------------------------------