# 起因

from __future__ import print_function

import cv2
import math
import numpy as np
from numpy.linalg import norm
import time

H = 3008
W = 4112

class Validator(object):
def __init__(self):
pass

def is_valid(self, x, y):
return False

class RadiusValidator(Validator):
def __init__(self, center, R, width):
super(RadiusValidator, self).__init__()

self.R = R
self.center = center # A two element NumPy array. [x, y].
self.width = width

if ( self.width <= 0 ):
raise Exception("self.width wrong. self.width = {}".format(self.width))

# Overide parent's function.
def is_valid(self, x, y):
x = x - self.center[0]
y = y - self.center[1]

r = math.sqrt( x * x + y * y )

if ( r >= self.R - self.width and r <= self.R + self.width ):
return True
else:
return False

class PolarLineSegmentValidator(Validator):
def __init__(self, center, theta, length, width):
super(PolarLineSegmentValidator, self).__init__()

self.center = center # A two element NumPy array. [x, y].
self.theta  = theta
self.length = length
self.width  = width

self.endP    = np.zeros((2,), dtype=np.float32)
self.endP[0] = self.length * math.cos( self.theta )
self.endP[1] = self.length * math.sin( self.theta )

self.dir = self.endP / self.length

def is_valid(self, x, y):
# Transform the coordinate to the local frame.
v = np.zeros((2, ), dtype=np.float32)

v[0] = x - self.center[0]
v[1] = y - self.center[1]

# Projection.
proj = self.dir.dot( v )

if ( proj < 0 ):
return False
elif ( proj > self.length ):
return False

# Othogonal vector.
oth = v - proj*self.dir

# Distance.
d = norm( oth )

if ( d <= self.width ):
return True
else:
return False

if __name__ == "__main__":
print("List the indices for AEAG brightness evaluation.")

# Create a blank image with all zeros.
img = np.zeros((H, W), dtype=np.uint8)

imgCenter = np.array([ W/2, H/2 ], dtype=np.float32)

# Create the validators.
vList = []
vList.append( RadiusValidator( imgCenter, min( W/2, H/2 ) * 0.75, 20 ) )
vList.append( PolarLineSegmentValidator( imgCenter,  math.pi/2, min( W/2, H/2 ) * 0.75, 10 ) )
vList.append( PolarLineSegmentValidator( imgCenter,  math.pi/4, min( W/2, H/2 ) * 0.75, 10 ) )
vList.append( PolarLineSegmentValidator( imgCenter,          0, min( W/2, H/2 ) * 0.75, 10 ) )
vList.append( PolarLineSegmentValidator( imgCenter, -math.pi/4, min( W/2, H/2 ) * 0.75, 10 ) )
vList.append( PolarLineSegmentValidator( imgCenter, -math.pi/2, min( W/2, H/2 ) * 0.75, 10 ) )

# Record the starting time.
start = time.time()

for i in range(H):
for j in range(W):
flag = False
for v in vList:
flag = flag or v.is_valid( j, i )

if ( True == flag ):
img[i, j] = 255

if ( 0 == i % 100 ):
print("%d," % (i), end=None)

# Record the ending time.
end = time.time()

print(end - start)

# Save the image.
cv2.imwrite("ValidPixels.png", img)


# numba + CUDA

numba可以在没有CUDA支持时使用CPU进行加速，而这里我只感兴趣CUDA的部分。

numba要用conda配置，也是醉了。还好用了conda environment。

## numba天生支持NumPy，但是CUDA部分仅提供非常有限的支持

• kernel函数内部对大部分NumPy的函数不支持
CUDA kernel函数内不允许任何可能产生memory allocation的NumPy函数，这使得绝大部分NumPy函数都不能使用了，尤其是创建array的函数，例如zeros(), array()，和linspace()等。对array进行运算的函数也不支持。但是我们仍然可以使用NumPy的标量函数，例如sin()。kernel函数内部可以使用NumPy对象的shape属性。NumPy的indexing和slicing操作似乎仍然可以使用。
• kernel函数内部对list貌似不支持，对tuple支持。貌似可以创建list，但是索引和更新时会失败。一个在stackoverflow上的讨论。dictionary和generator就不要想了。
• kernel函数内部貌似不支持对自定义类的使用。
• device函数需要使用@cuda.jit(device=True)进行修饰，可以有返回值。我不知道为什么官方和上述github教程对device函数没有专门的章节，也许是我没有找到。

## CUDA部分代码


from __future__ import print_function

import cv2
import math
import matplotlib.pyplot as plt
import numpy as np
from numba import cuda
import time

@cuda.jit(device=True)
def d_radius_validate(cx, cy, R, width, x, y):
x = x - cx
y = y - cy

r = math.sqrt( x * x + y * y )

if ( r >= R - width and r <= R + width ):
return 255
else:
return 0

@cuda.jit(device=True)
def d_polar_line_segment_validate(cx, cy, theta, length, width, x, y):
n0 = length * math.cos(theta) / length
n1 = length * math.sin(theta) / length

v0 = x - cx
v1 = y - cy

proj = n0 * v0 + n1 * v1

if ( proj < 0 ):
return 0
elif ( proj > length ):
return 0

oth0 = v0 - proj*n0
oth1 = v1 - proj*n1

d = math.sqrt( oth0 * oth0 + oth1 * oth1 )

if ( d <= width ):
return 255
else:
return 0

@cuda.jit
def k_validate(imgOut):
tx = cuda.blockIdx.x*cuda.blockDim.x + cuda.threadIdx.x
ty = cuda.blockIdx.y*cuda.blockDim.y + cuda.threadIdx.y

xStride = cuda.blockDim.x * cuda.gridDim.x
yStride = cuda.blockDim.y * cuda.gridDim.y

cx, cy = 2056, 1504
R = 1504*0.75
halfWidth = 10.0

for y in range( ty, imgOut.shape[0], yStride ):
for x in range( tx, imgOut.shape[1], xStride ):
flag = d_radius_validate( cx, cy, R, halfWidth, 1.0*x, 1.0*y )
if ( 0 != flag ):
imgOut[y, x] = flag

flag = d_polar_line_segment_validate( cx, cy, -math.pi/2, R, halfWidth, 1.0*x, 1.0*y )
if ( 0 != flag ):
imgOut[y, x] = flag

flag = d_polar_line_segment_validate( cx, cy, -math.pi/4, R, halfWidth, 1.0*x, 1.0*y )
if ( 0 != flag ):
imgOut[y, x] = flag

flag = d_polar_line_segment_validate( cx-halfWidth, cy, 0.0, R + halfWidth, halfWidth, 1.0*x, 1.0*y )
if ( 0 != flag ):
imgOut[y, x] = flag

flag = d_polar_line_segment_validate( cx, cy, math.pi/4, R, halfWidth, 1.0*x, 1.0*y )
if ( 0 != flag ):
imgOut[y, x] = flag

flag = d_polar_line_segment_validate( cx, cy, math.pi/2, R, halfWidth, 1.0*x, 1.0*y )
if ( 0 != flag ):
imgOut[y, x] = flag

if __name__ == "__main__":
# Create an image.
img = np.zeros((3008, 4112), dtype=np.int32)

# Record the starting time.
start = time.time()

dImg = cuda.to_device(img)

cuda.synchronize()
k_validate[[100, 100, 1], [16, 16, 1]](dImg)
cuda.synchronize()

img = dImg.copy_to_host()

# Record the ending time.
end = time.time()

print(end - start)

# Save the image.
cv2.imwrite("ValidPixels_numba.png", img)

print("Done.")



©️2019 CSDN 皮肤主题: 大白 设计师: CSDN官方博客