通过Saliency map原理配合傅里叶转换抓取显著性区域,实现焊缝抓取~
资源来自《opencv with python Blueprints》
import cv2
import numpy as np
import matplotlib.pyplot as plt
import math, os, threading
class Saliency:
'''
A class that generates a saliency map from RGB color image.
Saliency map: We will use Fourier analysis to get a general understanding
of natural image statistics, which will help us build a model of what general
image backgrounds look like. By comparing and contrasting the background
model to a specific image frame, we can locate sub-regions of the image that
pop out of their surroundings. Ideally, these sub-regions correspond to the
image patches that tend to grab our immediate attention when looking at the
image
'''
def __init__(self, img, use_numpy_fft=True, gauss_kernel=(5,5)):
self.use_numpy_fft = use_numpy_fft
self.gauss_kernel = gauss_kernel
self.frame_org = img
# A saliency map will be generated from a down sampled version of the image,
# and because the computation is relatively time-intensive,
# we will maintain a flag need_saliency_map that makes sure we do the computations only once:
self.small_shape = (64, 64)
self.frame_small = cv2.resize(img, self.small_shape[1::-1])
# whether we need to do the math (True) or it has already been done (False)
self.need_saliency_map = True
# From then on, the user may call any of the class' public methods,
# which will all be passed on the same image
def _get_channel_saliency_magnitude(self,channel):
'''
In order to generate a saliency map based on the spectral residual approach,
we need to process each channel of an input image separately (single channel
in the case of a grayscale input image, and three separate channels in the
case of an RGB input image).
The resulting single-channel saliency map(magnitude) is then returned to
Saliency.get_saliency_map, where the procedure is repeated for all channels
of the input image. If the input image is grayscale, we are pretty much done.
:param channel:
:return:->magnitude
'''
# 1. Calculate the (magnitude and phase of the)Fourier spectrum of an image,
# by again using either the fft module of NumPy's or OpenCV functionality.
if self.use_numpy_fft:
img_dft = np.fft.fft2(channel)
magnitude, angle = cv2.cartToPolar(np.real(img_dft),
np.imag(img_dft))
else:
img_dft = cv2.dft(np.float32(channel),
flags=cv2.DFT_COMPLEX_OUTPUT)
magnitude, angle = cv2.cartToPolar(img_dft[:,:,0],
img_dft[:,:,1])
# 2. Calculate the log amplitude of the Fourier spectrum.
# We will clip the lower bound of magnitudes to 1e-9 in order to
# prevent a division by zero while calculating the log.
log_amplitude = np.log10(magnitude.clip(min=1e-9))
# 3. Approximate the averaged spectrum of a typical natural image by
# convolving the image with a local averaging filter.
log_amplitude_blur = cv2.blur(log_amplitude,(3,3))
# 4. Calculate the pectral residual.
# The spectral residual primarily contains the nontrivial (or unexpected) parts of a scene.
residual = np.exp(log_amplitude - log_amplitude_blur)
# 5. Calculate the saliency map by using the inverse Fourier transform,
# agian either via the fft module in NumPy or with OpenCV.
if self.use_numpy_fft:
#---------------------------------------Question---------------------------------------------------
real_part, imag_part = cv2.polarToCart(residual,angle)
img_combined = np.fft.ifft2(real_part + 1j*imag_part)
magnitude, _ = cv2.cartToPolar(np.real(img_combined),np.imag(img_combined))
else:
img_dft[:, :, 0], img_dft[:, :, 1] = cv2.polarToCart(residual, angle)
img_combined = cv2.idft(img_dft)
magnitude, _ = cv2.cartToPolar(img_combined[:, :, 0], img_combined[:, :, 1])
return magnitude
def plot_magnitude(self):
'''
In OpenCV, this transformation can be achieved with the Discrete Fourier Transform(DFT)
using the plot_magnitude method of the saliency class.
:return:
'''
# 1.Convert the image to grayscale if necessary:
# Cause the method accepts both grayscale and RGB color images,
# we need to make sure we operate on a single-channel image
if len(self.frame_org.shape) > 2:
frame = cv2.cvtColor(self.frame_org, cv2.COLOR_BGR2GRAY)
else:
frame = self.frame_org
# 2. Expand the image to an optimal size:
# It turns out that the performance of a DFT depends on the image size.
# It tends to be fastest for the image sizes that are multiples of the number two.
# It is therefore generally a good idea to pad the image with zeros.
rows, cols = frame.shape[:2]
nrows = cv2.getOptimalDFTSize(rows)
ncols = cv2.getOptimalDFTSize(cols)
frame = cv2.copyMakeBorder(frame,
top=0, bottom=nrows-rows,
left=0, right=ncols-cols,
borderType=cv2.BORDER_CONSTANT, value=0)
# 3. Apply the DFT:
# This is a single function call in NumPy.
# The result is a 2D matrix of complex numbers.
img_dft = np.fft.fft2(frame)
# 4. Transform the real and complex values to magnitude:
# A complex number has a real (Re) and a complex (imaginary - Im) part.
# To extract the magnitude, we take the absolute value.
magnitude = np.abs(img_dft)
# 5. Switch to a logarithmic scale:
# It turns out that the dynamic range of the Fourier coefficients
# usually too large to be displayed on the scree.
# We have some small and some high changing values that we can't observe like this.
# Therefore, the high values will all turn out as a white points,
# and the small ones as black points.
# To use the gray scale values for visualization,
# we can transform out linear scale to a logarithmic one.
log_magnitude = np.log10(magnitude)
# 6. Shift quadrants:
# To center the spectrum on the image.
# This makes it easier to visually inspect the magnitude spectrum.
spectrum = np.fft.fftshift(log_magnitude)
# 7. Return the result for plotting.
return spectrum/np.max(spectrum)*255
def get_saliency_map(self):
'''
The main method to convert an RGB color image to a saliency map.
:return: The saliency map which value range in [0.,1.]
'''
if self.need_saliency_map:
# have't calculated saliency map for this frame yet
num_channels = 1
if len(self.frame_org.shape) == 2:
# single channel
sal = self._get_channel_saliency_magnitude(self.frame_small)
else:
# consider each channel independently
sal = np.zeros_like(self.frame_small).astype(np.float32)
for c in range(len(self.frame_small.shape)):
sal[:, :, c] = self._get_channel_saliency_magnitude(self.frame_small[:, :, c])
# The overall salience of a multichannel image is the determined
# by average over all channel
sal = np.mean(sal,2)
# Finally, we fneed to apply some post-processing, such as an optional blurring
# stage to make the result appear smoother
if self.gauss_kernel is not None:
sal = cv2.GaussianBlur(sal, self.gauss_kernel, sigmaX=8, sigmaY=0)
# Also we need to square the values in sal in order to highlight the regions of high salience,
# as outlined by the authors of the original paper.
# In order to display the image, we scale it back up to its original resolution and
# normalize the values, so that the largest value is one.
sal = sal ** 2
sal = np.float32(sal) / np.max(sal)
sal = cv2.resize(sal, self.frame_org.shape[1::-1])
#Inorder to aviod having to redo all these intense calculations,
# we store a local copy of the saliency map for further reference and
# make sure to lower the flag.
self.saliency_map = sal
self.need_saliency_map = False
return self.saliency_map
def get_proto_objects_map(self, use_otsu=True):
'''
A method to convert a saliency map into a binary mask containing all the proto-objects.
:return:
'''
saliency = self.get_saliency_map()
if use_otsu:
img_objs = cv2.threshold(np.uint8(saliency*255),0,255,cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
else:
thresh = np.mean(saliency) * 255
img_objs = cv2.threshold(np.uint8(saliency*255),thresh,255,cv2.THRESH_BINARY)[1]
return img_objs
def plot_power_spectrum(self):
'''
A method to display the radially averaged power spectrum of an RGB color image,
which is helpful to understand natural image statistics.
:return:
'''
# 1. Convert the image to grayscale if necessary.
if len(self.frame_org.shape)>2:
frame = cv2.cvtColor(self.frame_org, cv2.COLOR_BGR2GRAY)
else:
frame = self.frame_org
# 2. Expand the image to optimal size.
rows, cols = frame.shape[:2]
nrows = cv2.getOptimalDFTSize(rows)
ncols = cv2.getOptimalDFTSize(cols)
frame = cv2.copyMakeBorder(frame,
top=0, bottom=nrows-rows,
left=0, right=ncols-cols,
borderType=cv2.BORDER_CONSTANT, value=0)
# 3. Apply the DFT and get the log spectrum:
# Here we give the user an option (via flag use_numpy_fft)
# to use either NumPy's or OpenCV's Fourier tools.
if self.use_numpy_fft:
img_dft = np.fft.fft2(frame)
spectrum = np.log10(np.real(np.abs(img_dft))**2)
else:
img_dft = cv2.dft(np.float32(frame),
flags=cv2.DFT_COMPLEX_OUTPUT)
spectrum = np.log10(img_dft[:,:,0]**2 +
img_dft[:,:,1]**2)
# 4. Perfom radial averaging:
# This is the tricky part.
# It would be wrong to simply average the 2D spectrum in the direction of x or y.
# What we are intersted in is a spectrum as a function of frequency,
# independent of the exact orientation.
# This sometimes also called the "radially averaged power spectrum (RAPS)",
# and can be achieved by summing up all the frequency magnitudes,
# starting at the center of the image,
# looking into all possible (radial) directions, from some frequency r to r+dr.
# We use the binning function of NumPy's histogram to sum up the number,
# and accumulate the in variable histo
L = max(frame.shape)
freqs = np.fft.fftfreq(L)[:int(L/2)]
dists = np.sqrt(np.fft.fftfreq(frame.shape[0])[:,np.newaxis]**2 +
np.fft.fftfreq(frame.shape[1])**2)
dcount = np.histogram(dists.ravel(), bins=freqs)[0]
histo, bins = np.histogram(dists.ravel(),
bins=freqs,
weights=spectrum.ravel())
# 5. Plot the result
# Finally we can plot the accumulated numbers in histo,
# but must not forget to normalize these by the bin size(dcount).
centers = (bins[:-1] + bins[1:]) / 2
plt.plot(centers, histo/dcount)
plt.xlabel('frequency')
plt.ylabel('log-spectrum')
plt.show()
if __name__ == '__main__':
filtPath = r'D:\Jay.Lee\Study\imgs\weldcircle.png'
img = cv2.imread(filtPath,cv2.IMREAD_COLOR)
saliency = Saliency(img)
saliency.plot_power_spectrum()
mask = cv2.morphologyEx(saliency.get_proto_objects_map(use_otsu=False),
cv2.MORPH_CLOSE,
cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(55,55)))
cv2.imshow('saliency map', cv2.bitwise_and(img,img,mask=mask))
cv2.waitKey()
cv2.destroyAllWindows()
效果如下: