转自:http://www.cnblogs.com/tntmonks/p/4899649.html
刚才发现一份快速高斯模糊的实现。
源地址为:http://incubator.quasimondo.com/processing/gaussian_blur_1.php
作者信息为:
Fast Gaussian Blur v1.3 by Mario Klingemann <http://incubator.quasimondo.com>
processing源码: http://incubator.quasimondo.com/processing/fastblur.pde
效果图:
转为C语言实现版本。
代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
// Fast Gaussian Blur v1.3
// by Mario Klingemann <http://incubator.quasimondo.com>
// C version updated and performance optimization by tntmonks(http://tntmonks.cnblogs.com)
// One of my first steps with Processing. I am a fan
// of blurring. Especially as you can use blurred images
// as a base for other effects. So this is something I
// might get back to in later experiments.
//
// What you see is an attempt to implement a Gaussian Blur algorithm
// which is exact but fast. I think that this one should be
// relatively fast because it uses a special trick by first
// making a horizontal blur on the original image and afterwards
// making a vertical blur on the pre-processed image. This
// is a mathematical correct thing to do and reduces the
// calculation a lot.
//
// In order to avoid the overhead of function calls I unrolled
// the whole convolution routine in one method. This may not
// look nice, but brings a huge performance boost.
//
//
// v1.1: I replaced some multiplications by additions
// and added aome minor pre-caclulations.
// Also add correct rounding for float->int conversion
//
// v1.2: I completely got rid of all floating point calculations
// and speeded up the whole process by using a
// precalculated multiplication table. Unfortunately
// a precalculated division table was becoming too
// huge. But maybe there is some way to even speed
// up the divisions.
//
// v1.3: Fixed a bug that caused blurs that start at y>0
// to go wrong. Thanks to Jeroen Schellekens for
// finding it!
void
GaussianBlur(unsigned
char
* img, unsigned
int
x, unsigned
int
y, unsigned
int
w, unsigned
int
h, unsigned
int
comp, unsigned
int
radius)
{
unsigned
int
i, j ;
radius = min(max(1, radius), 248);
unsigned
int
kernelSize = 1 + radius * 2;
unsigned
int
* kernel = (unsigned
int
*)
malloc
(kernelSize*
sizeof
(unsigned
int
));
memset
(kernel, 0, kernelSize*
sizeof
(unsigned
int
));
unsigned
int
(*mult)[256] = (unsigned
int
(*)[256])
malloc
(kernelSize * 256 *
sizeof
(unsigned
int
));
memset
(mult, 0, kernelSize * 256 *
sizeof
(unsigned
int
));
unsigned
int
sum = 0;
for
(i = 1; i < radius; i++){
unsigned
int
szi = radius - i;
kernel[radius + i] = kernel[szi] = szi*szi;
sum += kernel[szi] + kernel[szi];
for
(j = 0; j < 256; j++){
mult[radius + i][j] = mult[szi][j] = kernel[szi] * j;
}
}
kernel[radius] = radius*radius;
sum += kernel[radius];
for
(j = 0; j < 256; j++){
mult[radius][j] = kernel[radius] * j;
}
unsigned
int
cr, cg, cb;
unsigned
int
xl, yl, yi, ym, riw;
unsigned
int
read, ri, p, n;
unsigned
int
imgWidth = w;
unsigned
int
imgHeight = h;
unsigned
int
imageSize = imgWidth*imgHeight;
unsigned
char
* rgb = (unsigned
char
*)
malloc
(
sizeof
(unsigned
char
) * imageSize * 3);
unsigned
char
* r = rgb;
unsigned
char
* g = rgb + imageSize;
unsigned
char
* b = rgb + imageSize * 2;
unsigned
char
* rgb2 = (unsigned
char
*)
malloc
(
sizeof
(unsigned
char
) * imageSize * 3);
unsigned
char
* r2 = rgb2;
unsigned
char
* g2 = rgb2 + imageSize;
unsigned
char
* b2 = rgb2 + imageSize * 2;
for
(
size_t
yh = 0; yh < imgHeight; ++yh) {
for
(
size_t
xw = 0; xw < imgWidth; ++xw) {
n = xw + yh* imgWidth;
p = n*comp;
r[n] = img[p];
g[n] = img[p + 1];
b[n] = img[p + 2];
}
}
x = max(0, x);
y = max(0, y);
w = x + w - max(0, (x + w) - imgWidth);
h = y + h - max(0, (y + h) - imgHeight);
yi = y*imgWidth;
for
(yl = y; yl < h; yl++){
for
(xl = x; xl < w; xl++){
cb = cg = cr = sum = 0;
ri = xl - radius;
for
(i = 0; i < kernelSize; i++){
read = ri + i;
if
(read >= x && read < w)
{
read += yi;
cr += mult[i][r[read]];
cg += mult[i][g[read]];
cb += mult[i][b[read]];
sum += kernel[i];
}
}
ri = yi + xl;
r2[ri] = cr / sum;
g2[ri] = cg / sum;
b2[ri] = cb / sum;
}
yi += imgWidth;
}
yi = y*imgWidth;
for
(yl = y; yl < h; yl++){
ym = yl - radius;
riw = ym*imgWidth;
for
(xl = x; xl < w; xl++){
cb = cg = cr = sum = 0;
ri = ym;
read = xl + riw;
for
(i = 0; i < kernelSize; i++){
if
(ri < h && ri >= y)
{
cr += mult[i][r2[read]];
cg += mult[i][g2[read]];
cb += mult[i][b2[read]];
sum += kernel[i];
}
ri++;
read += imgWidth;
}
p = (xl + yi)*comp;
img[p] = (unsigned
char
)(cr / sum);
img[p + 1] = (unsigned
char
)(cg / sum);
img[p + 2] = (unsigned
char
)(cb / sum);
}
yi += imgWidth;
}
free
(rgb);
free
(rgb2);
free
(kernel);
free
(mult);
}
|
该代码,将二维数组进一步优化后可提升一定的速度。
在博主机子上测试一张5000x3000的图像,模糊半径为10的情况下,耗时4s.