图像梯度
sobelxy = cv2.Sobel(img,cv2.CV_64F,1,1,ksize=3)
# sobelx=cv2.Sobel(img,cv2.CV_64F,1,0,ksize=3)#1,0表示只在x方向求一阶导数
# sobely=cv2.Sobel(img,cv2.CV_64F,0,1,ksize=3)#0,1表示只在y方向求一阶导数
# mag=cv2.magnitude(sobelx,sobely) #对应每个像素的梯度矩阵
#对于Sobel函数有个注意点,他的第二个参数是扩展了像素的数值范围,因为梯度是有方向的,
#所以sobel函数得到的是有正有负的值,所以相当于扩大了取值。
#通常情况下我们会加上下面的函数来得到梯度的绝对值:
#sobelx=cv2.convertScaleAbs(cv2.Sobel(im,cv2.CV_64F,1,0,ksize=3))
#由于magnitude计算的是sqrt(x^2+y^2),所以不用考虑正负的问题
# laplacian = cv2.Laplacian(sub_im,cv2.CV_64F) #二阶导数,用于获取边缘信息
腐蚀,膨胀
#对二值化的图像进行腐蚀,膨胀,的形态学组合变换,
#先进行腐蚀再进行膨胀的运算就是开运算,腐蚀可以让那些在图像外面的小点点去掉,
#然后把主图像膨胀回去,实现去除图像外噪声。
#先进行膨胀再进行腐蚀的运算就是闭运算,膨胀可以让那些在图像里面的小点点去掉,
#然后把主图像腐蚀回去,实现去除图像内噪声。
# 膨胀和腐蚀操作的核函数
element1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))#(9, 1)
element2 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))#(8, 6)
# 膨胀一次,让轮廓突出
dilation = cv2.dilate(binary, element2, iterations = 1)
# 腐蚀一次,去掉细节
erosion = cv2.erode(dilation, element1, iterations = 1)
# 再次膨胀,让轮廓明显一些
binary = cv2.dilate(erosion, element2,iterations = 3)
灰度转换
from skimage.color import rgb2gray
gray_im = rgb2gray(rgb_im)
# 或者使用自定义函数:
def rgb2grey(rgb):
if len(rgb.shape) is 3:
return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
else:
print 'Current image is already in grayscale.'
return rgb
也可使用PIL图像处理库或者OpenCV的python接口.
python 绘图
faster r-cnn中的画框再加文字
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12, 12))
ax.imshow(im, aspect='equal')
ax.add_patch(
plt.Rectangle((bbox[0], bbox[1]),
bbox[2] - bbox[0],
bbox[3] - bbox[1], fill=False,
edgecolor='red', linewidth=3.5)
)
ax.text(bbox[0], bbox[1] - 2,
'{:s} {:.3f}'.format(class_name, score),
bbox=dict(facecolor='blue', alpha=0.5),
fontsize=14, color='white')
ax.set_title(('{} detections with '
'p({} | box) >= {:.1f}').format(class_name, class_name,
thresh),
fontsize=14)
plt.axis('off')
plt.tight_layout()
plt.draw()
plt.show()
OpenCV、Skimage、PIL图像处理的细节差异
PIL 图像处理库
PIL(Python Imaging Library) 是Python中最基础的图像处理库
from PIL import Image
import numpy as np
image = Image.open('test.jpg') # 图片是400x300 宽x高
print type(image) # out: PIL.JpegImagePlugin.JpegImageFile
print image.size # out: (400,300)
print image.mode # out: 'RGB'
print image.getpixel((0,0)) # out: (143, 198, 201)
# resize
image = image.resize(200,100,Image.NEAREST)
print image.size # out: (200,100)
代码解释:
注意image 是 PIL.Image.Image
类的对象,它有很多属性,比如它的size是(w,h),通道是RGB.有很多方法,比如获取getpixel((x,y))某个位置的像素,得到三个通道的值,x最大可取w-1,y最大可取h-1
比如resize方法,可以实现图片的放缩,具体参数如下
resize(self, size, resample=0)
method of PIL.Image.Image instance
Returns a resized copy of this image.
:param size: The requested size in pixels, as a 2-tuple:
(width, height).
注意size是 (w,h),和原本的(w,h)保持一致
:param resample: An optional resampling filter. This can be
one of :py:attr:`PIL.Image.NEAREST`, :py:attr:`PIL.Image.BOX`,
:py:attr:`PIL.Image.BILINEAR`, :py:attr:`PIL.Image.HAMMING`,
:py:attr:`PIL.Image.BICUBIC` or :py:attr:`PIL.Image.LANCZOS`.
If omitted, or if the image has mode "1" or "P", it is
set :py:attr:`PIL.Image.NEAREST`.
See: :ref:`concept-filters`.
注意这几种插值方法,默认NEAREST最近邻(分割常用),分类常用BILINEAR双线性,BICUBIC立方
:returns: An :py:class:`~PIL.Image.Image` object.
转换为numpy,注意h,w维度的变化:
image = np.array(image,dtype=np.float32) # image = np.array(image)默认是uint8
print image.shape # out: (100, 200, 3)
# 原PIL image对象的维度的w和h换了,变成(h,w,c)了
# 注意ndarray中是 行row x 列col x 维度dim 所以行数是高,列数是宽
其它使用示例:
1.将图片转换为灰度图:
from PIL import Image
pil_im=Image.open('empire.jpg')
上述代码的返回值 pil_im
是一个 PIL 图像对象。
图像的颜色转换可以使用 convert()
方法来实现。要读取一幅图像,并将其转换成灰度图像,只需要加上 convert('L')
,如下所示:
pil_im=Image.open('empire.jpg').convert('L')
2.将numpy.ndarray中的图像数据存储到图片(ndarray可以是2维的灰度数据矩阵):
im = Image.fromarray(ndarray)
im.save('out.png')
pillow支持的存储模式:
Modes
The mode of an image defines the type and depth of a pixel in the image. The current release supports the following standard modes:
1 (1-bit pixels, black and white, stored with one pixel per byte) L (8-bit pixels, black and white) P (8-bit pixels, mapped to any other mode using a color palette) RGB (3x8-bit pixels, true color) RGBA (4x8-bit pixels, true color with transparency mask) CMYK (4x8-bit pixels, color separation) YCbCr (3x8-bit pixels, color video format) LAB (3x8-bit pixels, the Lab color space) HSV (3x8-bit pixels, Hue, Saturation, Value color space) I (32-bit signed integer pixels) F (32-bit floating point pixels)PIL also provides limited support for a few special modes, including LA (L with alpha), RGBX (true color with padding) and RGBa (true color with premultiplied alpha). However, PIL doesn’t support user-defined modes; if you to handle band combinations that are not listed above, use a sequence of Image objects.
You can read the mode of an image through the mode attribute. This is a string containing one of the above values.
3.等比例压缩、裁剪压缩、缩略(水印)图
region = (x1,y1,x2,y2)
#裁切图片
cropImg = pil_im.crop(region)
#保存裁切后的图片
cropImg.save(r'positive/'+mat_file+'_'+str(col)+'.jpg')
#压缩缩放
pil_im.resize((newWidth,newHeight),Image.ANTIALIAS).save(arg['dst_img'],quality=75)
'''
除了Image.ANTIALIAS还有如下值:
NEAREST: use nearest neighbour
BILINEAR: linear interpolation in a 2x2 environment
BICUBIC:cubic spline interpolation in a 4x4 environment
ANTIALIAS:best down-sizing filter
'''
#水印(这里仅为图片水印)
def waterMark(**args):
args_key = {'ori_img':'','dst_img':'','mark_img':'','water_opt':''}
arg = {}
for key in args_key:
if key in args:
arg[key] = args[key]
im = image.open(arg['ori_img'])
ori_w,ori_h = im.size
mark_im = image.open(arg['mark_img'])
mark_w,mark_h = mark_im.size
option ={'leftup':(0,0),'rightup':(ori_w-mark_w,0),'leftlow':(0,ori_h-mark_h),
'rightlow':(ori_w-mark_w,ori_h-mark_h)
}
im.paste(mark_im,option[arg['water_opt']],mark_im.convert('RGBA'))
im.save(arg['dst_img'])
Skimage
skimage即是Scikit-Image
import skimage
from skimage import io,transform
import numpy as np
image= io.imread('test.jpg',as_grey=False)
# 第一个参数是文件名可以是网络地址,第二个参数默认为False,True时为灰度图
print type(image) # out: numpy.ndarray
print image.dtype # out: dtype('uint8')
print image.shape # out: (300, 400, 3) (h,w,c)前面介绍了ndarray的特点
# mode也是RGB
print image
'''
注意此时image里都是整数uint8,范围[0-255]
array([
[ [143, 198, 201 (dim=3)],[143, 198, 201],... (w=200)],
[ [143, 198, 201],[143, 198, 201],... ],
...(h=100)
], dtype=uint8)
'''
image= io.imread('test.jpg',as_grey=True)
print image.shape # out: (300, 400)
print image
'''
此时image范围变为[0-1]
array([[ 0.73148549, 0.73148549, 0.73148549, ..., 0.73148549,
0.73148549, 0.73148549],
[ 0.73148549, 0.73148549, 0.73148549, ..., 0.73148549,
.....]])
'''
print image.dtype # out: dtype('float64')
image = io.imread('test.jpg',as_grey=False)
image = transform.resize(image,(100, 200),order=1) # order默认是1,双线性
#resize后image范围又变成[0-1]
print image.dtype # out: dtype('float64')
print image.shape # out: (100, 200, 3)
print image
'''
array([[[ 0.56078431, 0.77647059, 0.78823529],
[ 0.56078431, 0.77647059, 0.78823529],
[ 0.56078431, 0.77647059, 0.78823529],
..., ...]])
'''
'''
resize函数接口
resize(image, output_shape, order=1, mode='constant', cval=0, clip=True, preserve_range=False)
order : int, optional
The order of interpolation. The order has to be in the range 0-5:
- 0: Nearest-neighbor
- 1: Bi-linear (default)
- 2: Bi-quadratic
- 3: Bi-cubic
- 4: Bi-quartic
- 5: Bi-quintic
'''
print skimage.img_as_float(image).dtype # out: float64
# img_as_float可以把image转为double,即float64
OpenCV
OpenCV是个很强大的图像处理库,性能也很好。
import cv2
import numpy as np
image = cv2.imread('test.jpg')
print type(image) # out: numpy.ndarray
print image.dtype # out: dtype('uint8')
print image.shape # out: (300, 400, 3) (h,w,c) 和skimage类似
print image
'''
array([
[ [143, 198, 201 (dim=3)],[143, 198, 201],... (w=200)],
[ [143, 198, 201],[143, 198, 201],... ],
...(h=100)
], dtype=uint8)
'''
image = cv2.resize(image,(100,200),interpolation=cv2.INTER_LINEAR)
print image.dtype # out: dtype('uint8')
print image.shape # out: (200, 100, 3)
'''
注意注意注意 和skimage不同
resize(src, dsize[, dst[, fx[, fy[, interpolation]]]])
关键字参数为dst,fx,fy,interpolation
dst为缩放后的图像
dsize为(w,h),但是image是(h,w,c)
fx,fy为图像x,y方向的缩放比例,
interplolation为缩放时的插值方式,有三种插值方式:
cv2.INTER_AREA:使用象素关系重采样。当图像缩小时候,该方法可以避免波纹出现。当图像放大时,类似于 CV_INTER_NN方法
cv2.INTER_CUBIC: 立方插值
cv2.INTER_LINEAR: 双线形插值
cv2.INTER_NN: 最近邻插值
'''
在进行图像处理时一点要注意 各个库之间的细微差异,还有要注意图像放缩时插值方法的选择,而且即使是相同的插值方法,各个库的实现也不同,结果也会有些许差异。