阅读提示:
《Delphi图像处理》系列以效率为侧重点,一般代码为PASCAL,核心代码采用BASM。
《C++图像处理》系列以代码清晰,可读性为主,全部使用C++代码。
尽可能保持二者内容一致,可相互对照。
本文代码必须包括文章《Delphi图像处理 -- 数据类型及公用过程》中的ImageData.pas单元。
在图像的处理过程中,经常要用到卷积模板,如图像锐化、图像平滑、高斯模糊、Hough变换等,为此,本人使用Delphi编写了图像通用卷积处理过程和高斯模糊过程,代码如下:
procedure Convolution32(var Dest: TImageData; const Source: TImageData;
Template: Pointer; Size, TemplateOffset: Integer);
var
width, height: Integer;
dstOffset, srcOffset: Integer;
asm
push esi
push edi
push ebx
push ecx
call _SetCopyRegs
mov width, ecx
mov height, edx
mov dstOffset, ebx
mov srcOffset, eax
mov eax, TemplateOffset
pop ebx // ebx = Template
pxor xmm7, xmm7
@@yLoop: // for (y = 0; y < Dst.Height; y ++)
push width // {
@@xLoop: // for (x = 0; x < Dst.Width; x ++)
push esi // {
push ebx
pxor xmm0, xmm0 // xmm0 = 0
mov edx, size // for (I = 0; I < TemplateSize; I ++)
@@iLoop: // {
mov ecx, size // for (J = 0; J <= TemplateSize; J ++)
@@jLoop: // {
movd xmm1, [esi]
punpcklbw xmm1, xmm7
punpcklwd xmm1, xmm7
cvtdq2ps xmm1, xmm1
mulps xmm1, [ebx] // xmm1 = pixels[I, J] * Template[i * j]
addps xmm0, xmm1 // xmm0 += xmm1
add esi, 4 // esi += 4
add ebx, 16 // edx += 16
loop @@jLoop // }
add esi, eax
dec edx
jnz @@iLoop // }
cvtps2dq xmm0, xmm0
packssdw xmm0, xmm7
packuswb xmm0, xmm7
movd [edi], xmm0 // *(ARGB*)edi = xmm0
pop ebx
pop esi
add esi, 4 // esi += 4
add edi, 4 // edi += 4
dec width
jnz @@xLoop // }
add esi, srcOffset // esi += srcOffset
add edi, dstOffset // edi += dstOffset
pop width
dec height
jnz @@yLoop // }
pop ebx
pop edi
pop esi
end;
procedure ConvolutionOne(var Dest: TImageData; const Source: TImageData;
Template: Pointer; Size, TemplateOffset: Integer);
var
width, height: Integer;
dstOffset, srcOffset: Integer;
asm
push esi
push edi
push ebx
push ecx
call _SetCopyRegs
mov width, ecx
mov height, edx
mov dstOffset, ebx
mov srcOffset, eax
mov eax, TemplateOffset
pop ebx // ebx = Template
pxor mm7, mm7
@@yLoop: // for (y = 0; y < Dst.Height; y ++)
push width // {
@@xLoop: // for (x = 0; x < Dst.Width; x ++)
push esi // {
push ebx
pxor mm0, mm0 // mm0 = 0
mov edx, size // for (I = 0; I < TemplateSize; I ++)
@@iLoop: // {
mov ecx, size // for (J = 0; J <= TemplateSize; J ++)
@@jLoop: // {
movd mm1, [esi]
punpcklbw mm1, mm7
pmullw mm1, [ebx] // mm1 = pixels[I, J] * Template[i * j]
paddsw mm0, mm1 // mm0 += mm1
add esi, 4 // esi += 4
add ebx, 8 // edx += 8
loop @@jLoop // }
add esi, eax
dec edx
jnz @@iLoop // }
packuswb mm0, mm7
movd [edi], mm0 // *(ARGB*)edi = mm0
pop ebx
pop esi
add esi, 4 // esi += 4
add edi, 4 // edi += 4
dec width
jnz @@xLoop // }
add esi, srcOffset // esi += srcOffset
add edi, dstOffset // edi += dstOffset
pop width
dec height
jnz @@yLoop // }
emms
pop ebx
pop edi
pop esi
end;
procedure CvtTemplate8(Template8, Template: Pointer; n: Integer);
asm
push eax
push ecx
pcmpeqw mm7, mm7
psrlq mm7, 16
@@Loop:
movd mm0, [edx]
pshufw mm0, mm0, 0
pand mm0, mm7
movq [eax], mm0
add edx, 4
add eax, 8
loop @@Loop
pop ecx
pop eax
shr ecx, 1
shl ecx, 3
add eax, ecx
add eax, 6
mov word ptr[eax], 1 // (short)Template8[n / 2][3] = 1
emms
end;
procedure CvtTemplate16(Template16, Template: Pointer; n: Integer);
var
nuclear: Single;
asm
push edx
push ecx
fldz
@@SumLoop:
fadd dword ptr[edx]
add edx, 4
loop @@SumLoop
fstp nuclear
fwait
pop ecx
pop edx
cmp nuclear, 0
je @@1
movss xmm7, nuclear
shufps xmm7, xmm7, 0
@@Loop:
movss xmm0, [edx]
shufps xmm0, xmm0, 0
divps xmm0, xmm7
movaps [eax], xmm0
add edx, 4
add eax, 16
loop @@Loop
jmp @@Exit
@@1:
push eax
push ecx
pcmpeqd xmm7, xmm7
psrldq xmm7, 4 // xmm7 >> 32
@@Loop1:
movss xmm0, [edx]
shufps xmm0, xmm0, 0
andps xmm0, xmm7
movaps [eax], xmm0
add edx, 4
add eax, 16
loop @@Loop1
pop ecx
pop eax
shr ecx, 1
shl ecx, 4
add eax, ecx
add eax, 12
fld1
fstp dword ptr[eax] // (float)Template16[n / 2][3] = 1.0
@@Exit:
end;
procedure CvtTemplate16I(Template16, Template: Pointer; n, nuclear: Integer);
asm
cmp nuclear, 1
jle @@1
movd xmm7, nuclear
pshufd xmm7, xmm7, 0
cvtdq2ps xmm7, xmm7
@@Loop:
movd xmm0, [edx]
pshufd xmm0, xmm0, 0
cvtdq2ps xmm0, xmm0
divps xmm0, xmm7
movaps [eax], xmm0
add edx, 4
add eax, 16
loop @@Loop
jmp @@Exit
@@1:
push eax
push ecx
pcmpeqd xmm7, xmm7
psrldq xmm7, 4 // xmm7 >> 32
@@Loop1:
movd xmm0, [edx]
pshufd xmm0, xmm0, 0
pand xmm0, xmm7
cvtdq2ps xmm0, xmm0
movaps [eax], xmm0
add edx, 4
add eax, 16
loop @@Loop1
pop ecx
pop eax
shr ecx, 1
shl ecx, 4
add eax, ecx
add eax, 12
fld1
fstp dword ptr[eax] // (float)Template16[n / 2][3] = 1.0
@@Exit:
end;
procedure ImageConvolutionI(var Data: TImageData; const Template: array of Integer);
var
i, n, sum: Integer;
buffer, pTemplate: Pointer;
templateSize, templateOffset: Integer;
nuclear: Integer;
isFloatOp: Boolean;
src: TImageData;
begin
if Data.AlphaFlag then
ArgbConvertPArgb(Data);
n := Length(Template);
templateSize := Trunc(Sqrt(n));
src := _GetExpandData(Data, templateSize shr 1);
templateOffset := src.Stride - (templateSize shl 2);
nuclear := 0;
for i := 0 to n - 1 do
Inc(nuclear, Template[i]);
isFloatOp := nuclear > 1;
if not isFloatOp then
begin
sum := 0;
for i := 0 to n - 1 do
begin
Inc(sum, Template[i] * 255);
isFloatOp := (sum > 32767) or (sum < -32768);
if isFloatOp then Break;
end;
end;
if not isFloatOp then
begin
buffer := GlobalAllocPtr(GHND, n * Sizeof(TMMType));
CvtTemplate8(buffer, @Template[0], n);
ConvolutionOne(Data, src, buffer, templateSize, templateOffset);
end
else
begin
buffer := GlobalAllocPtr(GHND, n * 16 + 12);
pTemplate := Pointer((Integer(buffer) + 12) and (not 15));
CvtTemplate16I(pTemplate, @Template[0], n, nuclear);
Convolution32(Data, src, PTemplate, templateSize, templateOffset);
end;
GlobalFreePtr(Buffer);
FreeImageData(src);
if Data.AlphaFlag then
PArgbConvertArgb(Data);
end;
procedure ImageConvolution(var Data: TImageData; const Template: array of Single);
var
n: Integer;
buffer, pTemplate: Pointer;
templateSize, templateOffset: Integer;
src: TImageData;
begin
if Data.AlphaFlag then
ArgbConvertPArgb(Data);
n := Length(Template);
templateSize := Trunc(Sqrt(n));
src := _GetExpandData(Data, templateSize shr 1);
templateOffset := src.Stride - (templateSize shl 2);
buffer := GlobalAllocPtr(GHND, n * 16 + 12);
pTemplate := Pointer((Integer(buffer) + 12) and (not 15));
CvtTemplate16(pTemplate, @Template[0], n);
Convolution32(Data, src, PTemplate, templateSize, templateOffset);
GlobalFreePtr(Buffer);
FreeImageData(src);
if Data.AlphaFlag then
PArgbConvertArgb(Data);
end;
因为要进行图像卷积操作,必须拷贝出源图像,本文过程在拷贝源图像时使用_GetExpandData过程附带扩展了其边框,这样在卷积图像边界像素时不必再进行坐标范围判断,从而简化了代码和提高了运行速度。
考虑不同的应用环境,本文写了浮点卷积矩阵和整数卷积矩阵2个版本。
浮点版本采用SSE浮点运算,每次可同时处理图像像素ARGB4个分量,运行速度比较快;
整数版本有2种处理方法,对于卷积核小于等于1的卷积模板采用了MMX处理,过程速度相当快,当然也要求其模板单个数据项不得超过127,整个模板数据项与255的乘积和不得超过$7fff,一般卷积核小于等于1的卷积模板都远远小于这些值(当然,代码中已经对此作了检查,并做出了相应的处理)。对于卷积核大于1的卷积模板,则转换为浮点数版本处理。
下面给出一个直接调用GdipConvolution过程进行Hough变换的例子:
procedure TForm1.Button3Click(Sender: TObject);
const
// Ray: array[0..8] of Integer = (-1, 0, 1, -1, 0, 1, -1, 0, 1);
// Ray: array[0..8] of Integer = (-1, -1, 0, -1, 0, 1, 0, 1, 1);
Ray: array[0..8] of Integer = (-1, -1, -1, 0, 0, 0, 1, 1, 1);
var
bmp: TGpBitmap;
g: TGpGraphics;
data: TImageData;
begin
bmp := TGpBitmap.Create('..\media\msn.jpg');
g := TGpGraphics.Create(Canvas.Handle);
g.DrawImage(bmp, 0, 0);
data := LockGpBitmap(bmp);
ImageConvolutionI(Data, Ray);
UnlockGpBitmap(bmp, data);
g.DrawImage(bmp, data.Width, 0);
g.Free;
bmp.Free;
end;
运行效果如下图:
左边是原始图像,后面分别为用不同的Hough矩阵的运行效果,见例子代码中的几组Ray数组。作Hough变换,一般应该采用灰度图,本例子只是演示一下,没做图片灰度处理。
《Delphi图像处理》系列使用GDI+单元下载地址和说明见文章《GDI+ for VCL基础 -- GDI+ 与 VCL》。
因水平有限,错误在所难免,欢迎指正和指导。邮箱地址:maozefa@hotmail.com
这里可访问《Delphi图像处理 -- 文章索引》。