1.原理
老师上课PPT。
2.结构组织
F:.
│ main.py
│
├─data
│ ├─ex1
│ │ DJI_0013.JPG
│ │ DSC00438.JPG
│
├─operators
│ │ Forstner.py
│ │ robertsgradient.py
│
└─utils
display.py
gray_processing.py
readimage.py
stretch.py
__init__.py
3.代码部分
原本Forstner.py
中进行预选点的确定的速度是在太慢,使用Numba
进行了加速。原本感觉半个小时才能预选出来, 加了numba
之后只需15秒。但由于在一开始设计的时候没有考虑到numba
,导致没能够整体使用numba
。这是非常遗憾的情况,因为一旦图像非常大之后,程序运行就会比较慢。
这段程序也没有考虑适用情况,无论是读取还是写出的程序,都没考虑投影信息和地理参考信息,从而只能进行普通相片的特征提取。
最后,就是说程序也不一定对啦,有可能错的,但是确实提取了特征点。Forster.py
中进行预选的阈值self._t_for_pre_s
非常关键,是决定速度和提取质量的决定性要素。
网上有的资料,包括各种博客,都对求去w的阈值避而不谈,只求取了q的阈值。只有github上有几个计算了w的。本文计算了w的阈值。
3.1 rasterio.py
# -*- coding: utf-8 -*-
# @author: Dasheng Fan
# @email: fandasheng1999@163.com
"""
This code is used for image opening and writing.
And GDAL and numpy are used.
"""
import numpy as np
from osgeo import gdal
def readimage(path):
"""Read a image into a numpy array.
Args:
path: The path of image.
Return:
A numpy array of the image.
"""
dataset = gdal.Open(path, gdal.GA_ReadOnly)
if dataset == None:
raise Exception("File name error.")
else:
datatype = np.float
height = dataset.RasterYSize
width = dataset.RasterXSize
channels = dataset.RasterCount
# The projection and geo-transform are not considerd
image = np.zeros((height, width, channels), dtype = datatype)
for channel in range(channels):
band = dataset.GetRasterBand(channel + 1)
image[:, :, channel] = band.ReadAsArray()
return image
def write(image, path, dtype = 'png'):
dtype = gdal.Float32
height = image.shape[0]
width = image.shape[1]
channels = image.shape[2]
driver = gdal.GetDriverByName(dtype)
ds_to_save = driver.Create(path, width, height, channels, dtype)
# This is no projection and geo-transoform, we don't set them!
print("saving image......")
for band in range(channels):
ds_to_save.GetRasterBand(band + 1).WriteArray(image[:, :, band])
ds_to_save.FlushCache()
print("saved.")
del image
del ds_to_save
3.2 dispaly.py
# -*- coding: utf-8 -*-
# @author: Dasheng Fan
# @email: fandasheng1999@163.com
"""
This code is for the image displaying.
"""
import numpy as np
import numba as nb
from matplotlib import pyplot as plt
from stretch import stretch
def display(image):
"""
This function is used for show a image
Args:
image: The image your want to display
"""
image = image.astype(np.float16)
image = stretch(image, max_value=1)
if len(image.shape) == 3:
plt.imshow(image)
if len(image.shape) == 2:
plt.imshow(image, cmap='gray')
plt.show()
def doubledisplay(image1, image2):
"""
This function is used for show two images for comparation.
"""
image1 = stretch(image1.astype(np.float32), 1)
image2 = stretch(image2.astype(np.float32), 1)
plt.figure()
i = 1
for image in [image1, image2]:
plt.subplot(1, 2, i)
if len(image.shape) == 3:
plt.imshow(image)
elif len(image.shape) == 2:
plt.imshow(image, cmap = 'gray')
i+=1
plt.show()
def mask_process(mask):
"""
This function is used to change the flag points
of the mask to a '+' smybol for better perception.
Args:
mask:a binary numpy array as a mask.
Return:
a processed mask with '+' in every '1' points.
"""
height = mask.shape[0]
width = mask.shape[1]
extend_rol_and_col = 10
new_mask = np.zeros((height, width), dtype = np.float32)
@nb.jit(nopython = True)
def change(mask, new_mask, height, width):
for row in range(extend_rol_and_col, height-extend_rol_and_col):
for col in range(extend_rol_and_col, width-extend_rol_and_col):
if mask[row, col] == 1:
for i in range(extend_rol_and_col*-1, extend_rol_and_col+1):
new_mask[row+i, col] = 1
new_mask[row, col+i] = 1
change(mask, new_mask, height, width)
return new_mask
3.3 gray_processing.py
# -*- coding: utf-8 -*-
# @author: Dasheng Fan
# @email: fandasheng1999@163.com
"""
Gray processing.
"""
import numpy as np
from matplotlib import pyplot as plt
import sys
import os
def gray_processing(image):
"""
For gray processing.
Args:
image: A 3 channels image.
Reture:
A gray image.
Reference:
"https://baike.baidu.com/item/%E7%81%B0%E5%BA%A6%E5%8C%96/3206969?fr=aladdin"
"""
if len(image.shape) != 3:
raise Exception("The channel is wrong.")
u = np.power(image[:, :, 0], 2.2) + np.power(1.5*image[:, :, 1], 2.2) + \
np.power(0.6*image[:, :, 2], 2.2)
d = 1 + np.power(1.5, 2.2) + np.power(0.6, 2.2)
gray = np.power(u/d, 1/2.2)
return gray
if __name__ == '__main__':
pass
3.4 strech.py
# -*- coding: utf-8 -*-
# @author: Dasheng Fan
# @email: fandasheng1999@163.com
"""
Strech a image into a range (from 0 to some number).
"""
import numpy as np
def stretch(image, max_value = 1):
if len(image.shape) == 2:
image = image.astype(np.float)
max_val = np.max(image)
min_val = np.min(image)
diff = max_val - min_val
if diff == 0:
raise Exception("The range of the image is 0.")
image = image * max_value/diff
elif len(image.shape) == 3:
image = image.astype(np.float)
for band in range(len(image.shape)):
max_val = np.max(image[:, :, band])
min_val = np.min(image[:, :, band])
diff = max_val - min_val
if diff == 0:
raise Exception("The range of the image is 0.")
image[:, :, band] = image[:, :, band]*max_value/diff
else:
raise Exception("The dimention of the image is wrong!")
return image
3.5 Forstner.py
# -*- coding: utf-8 -*-
# @author: Dasheng Fan
# @email: fandasheng1999@163.com
"""
This code is used for build a Forstner operater for extrating
corners of a image.
"""
import numpy as np
np.set_printoptions(threshold=np.inf)
import sys
import os
import numba as nb
from matplotlib import pyplot as plt
from utils.gray_processing import gray_processing
from utils.display import display
class Forstner(object):
"""
Forstner.
Forstner is a operator for corner extracting.The paper
is:"A fast operator for detection and precise location
of distinct points, corners and circular features"
"""
def __init__(self, kernel_size = 5,
threshold_for_q = 0.5,
f = 0.5,
c=5,
maximum_kernel_size = 7,
threshold_for_pre_select = 20):
"""
Args:
kernel_size: the window size to calculate the covariance matrix
threshold_for_q: The threshold for culculate intrests.
f: Threshold parameter.
maximum_kernel_size: The window size for choosing the maximum.
"""
self._threshold_q = threshold_for_q
self._f = f
self._kernel_size = kernel_size
self._c = c
self._maximum_kernel_size = maximum_kernel_size
self._t_for_pre_s = threshold_for_pre_select
# @nb.jit()
def detect(self, image):
if len(image.shape) != 2:
image = gray_processing(image)
height = image.shape[0]
width = image.shape[1]
cut_size = np.floor(self._kernel_size/2).astype(np.int)
# for every pixel
w_matrix = np.zeros((height, width), dtype = np.float32)
q_matrix = np.zeros((height, width), dtype = np.float32)
w_list = []
flags = np.zeros((height, width), dtype = np.float32)
# pre_select points
print("start pre_select.")
@nb.jit(nopython = True)
def pre_select(image, height, width, cut_size, flags, threshold):
for row in range(cut_size, height-cut_size):
for col in range(cut_size, width - cut_size):
diff = np.zeros(4, np.float32)
diff[0] = image[row, col] - image[row, col+1]
diff[1] = image[row, col] - image[row+1, col]
diff[2] = image[row, col] - image[row, col-1]
diff[3] = image[row, col] - image[row-1, col]
diff = np.abs(diff)
if np.median(diff) >= threshold:
flags[row, col] = 1
return flags
flags = pre_select(image, height, width, cut_size, flags, self._t_for_pre_s)
print("pre_select over.")
print("pre-selected points:", len(np.where(flags == 1)[0]))
print("start:", end ="")
for row in range(cut_size, height - cut_size):
if row % 200 == 0:
print(":", end = "")
for col in range(cut_size, width - cut_size):
if flags[row, col] == 1:
# N for a window in (kernel_size, kernel_size)
sum_gu_2 = 0
sum_gv_2 = 0
sum_gu_gv = 0
N = np.zeros((2,2), dtype = np.float)
for var_x in range(-1*cut_size, cut_size):
for var_y in range(-1*cut_size, cut_size):
sum_gu_2 += (image[row+var_y+1, col+var_x+1] - image[row+var_y, col+var_x])**2
sum_gv_2 += (image[row+var_y+1, col+var_x] - image[row+var_y, col+var_x+1])**2
sum_gu_gv += (image[row+var_y+1, col+var_x+1] - image[row+var_y, col+var_x])*\
(image[row+var_y+1, col+var_x] - image[row+var_y, col+var_x+1])
N[0, 0] = sum_gu_2
N[1, 1] = sum_gv_2
N[0, 1] = sum_gu_gv
N[1, 0] = sum_gu_gv
q = 4*np.linalg.det(N)/((np.trace(N))**2)
w = np.linalg.det(N)/np.trace(N)
w_matrix[row, col] = w
q_matrix[row, col] = q
w_list.append(w)
# find the median and mean of all weight
w = np.array(w, dtype = np.float16)
w_mean = np.mean(w_list)
w_median = np.median(w_list)
# here we didn't need to start from cut_size
# because if w or q is 0 (as we initialized),
# then it will less than the threshold.
for row in range(height):
if row % 200 == 0:
print(":", end = "")
for col in range(width):
if flags[row, col] != 0:
if w_matrix[row, col] < self._f*w_median or \
w_matrix[row, col] < self._c*w_mean or \
q_matrix[row, col] < self._threshold_q:
flags[row, col] = 0
cut_size_max = np.floor(self._maximum_kernel_size/2).astype(np.int8)
for row in range(cut_size_max, height - cut_size_max):
if row % 200 == 0:
print(":", end = "")
for col in range(cut_size_max, width-cut_size_max):
if flags[row, col] != 0:
window = \
np.zeros((self._maximum_kernel_size, self._maximum_kernel_size),\
dtype = np.float)
window[:, :] = \
w_matrix[row-cut_size_max:row+cut_size_max+1, col-cut_size_max:col+cut_size_max+1]
if w_matrix[row, col] != np.max(window):
flags[row, col] = 0
print("end.")
print("corner points:", len(np.where(flags==1)[0]))
return flags
def __call__(self, image):
self.detect(image)
def main():
forstner = Forstner()
if __name__ == "__main__":
main()
3.6 main.py
# -*- coding: utf-8 -*-
# @author: Dasheng Fan
# @email: fandasheng1999@163.com
"""
This code is the main program.
"""
import os
import sys
import numpy as np
from matplotlib import pyplot as plt
sys.path.append("./utils")
sys.path.append("./operators")
from rasterio import readimage
from stretch import stretch
from display import display, doubledisplay, mask_process
from Forstner import Forstner
def main():
imagepath = "data\ex1\DJI_0013.JPG"
#imagepath = "C:\\Users\\Dash\\Desktop\misc\\yingpan.jpg"
img = readimage(imagepath)
forstner = Forstner()
mask = np.zeros((img.shape[0], img.shape[1]), np.float32)
mask = forstner.detect(img)
mask_for_show = mask_process(mask)
doubledisplay(mask_for_show, img)
if __name__ == '__main__':
main()