求overlaps
判定一个点是否在另一个矩形中
__device__ inline bool inrect(float pt_x, float pt_y, float * pts) {
// pt_x,pt_y是待判定的点
double ab[2];
double ad[2];
double ap[2];
double abab;
double abap;
double adad;
double adap;
ab[0] = pts[2] - pts[0];
ab[1] = pts[3] - pts[1];
ad[0] = pts[6] - pts[0];
ad[1] = pts[7] - pts[1];
ap[0] = pt_x - pts[0];
ap[1] = pt_y - pts[1];
abab = ab[0] * ab[0] + ab[1] * ab[1];
abap = ab[0] * ap[0] + ab[1] * ap[1];
adad = ad[0] * ad[0] + ad[1] * ad[1];
adap = ad[0] * ap[0] + ad[1] * ap[1];
bool result = (abab - abap >= 0) and (abap >= 0) and (adad - adap >= 0) and (adap >= 0);
return result;
}
求线段交点
线段a表示为端点a1和a2, 线段b表示为端点b1和b2. 为了利用向量的叉乘关系, 将线段的端点看成四个向量, 下面用粗体表示向量. 根据向量运算可知
a=a2-a1,
b=b2-b1.
将线段表示为参数方程:a=a1+ta,b=b1+ub
其中参数两条线段相交具有如下关系: a1+ta=b1+ub
将上式两边同时叉乘b, 得到:
(a1+ta)xb=(b1+ub)xb由于bxb=0, 可得 a1xb+taxb=b1xb解出参数t
t=(b1-a1)xb/(axb)代入线段a的参数方程中, 即可得到线段交点坐标:a1+ta
将上式中的中间变量用原始的线段端点表示, 即可得到根据线段端点表示的交点.
改进
源代码是先求出所有在另一个矩形中的顶点(内点,包括在另一个矩形的边上),然后再求两个矩形边上的交点(重合边不算),这样造成最后得到的相交区域的顶点集合顺序是乱的
改进思路是,先固定一个矩形的边, 然后在逆时针顺序循环比较另一个矩形的边,如果这两条边的初始顶点是内点,则马上加入相交区域的顶点集合,然后再加入两条边的交点。
RROIalign
把ROI分成pooled_height x pooled_width个部分, 在每个区域的中心点(小数)周围取四个实际的点(x,y上下取整数),并利用双边线性差值算出中心点对应的值。
__global__ void RROIAlignForward(
const int nthreads,
const T* bottom_data,
const T spatial_scale,
int height,
int width,
int channels,
const int pooled_height,
const int pooled_width,
const T* bottom_rois,
T* top_data,
float* con_idx_x,
float* con_idx_y)
{
CUDA_1D_KERNEL_LOOP(index, nthreads)
{
// +0.5 shift removed
int imageWidth = width;
int imageHeight = height;
// (n, c, ph, pw) is an element in the pooled output
int n = index;
int pw = n % pooled_width;
n /= pooled_width;
int ph = n % pooled_height;
n /= pooled_height;
int c = n % channels;
n /= channels;
const T* offset_bottom_rois = bottom_rois + n * 6; //=
int roi_batch_ind = offset_bottom_rois[0];
T cx = offset_bottom_rois[1];
T cy = offset_bottom_rois[2];
T h = offset_bottom_rois[3];
T w = offset_bottom_rois[4];
T angle = offset_bottom_rois[5]/180.0*3.1415926535;
//TransformPrepare
T dx = -pooled_width/2.0;
T dy = -pooled_height/2.0;
T Sx = w*spatial_scale/pooled_width;
T Sy = h*spatial_scale/pooled_height;
T Alpha = cos(angle);
T Beta = sin(angle);
T Dx = cx*spatial_scale;
T Dy = cy*spatial_scale;
T M[2][3];
M[0][0] = Alpha*Sx;
M[0][1] = Beta*Sy;
M[0][2] = Alpha*Sx*dx+Beta*Sy*dy+Dx;
M[1][0] = -Beta*Sx;
M[1][1] = Alpha*Sy;
M[1][2] = -Beta*Sx*dx+Alpha*Sy*dy+Dy;
T P[8];
P[0] = M[0][0]*pw+M[0][1]*ph+M[0][2];
P[1] = M[1][0]*pw+M[1][1]*ph+M[1][2];
P[2] = M[0][0]*pw+M[0][1]*(ph+1)+M[0][2];
P[3] = M[1][0]*pw+M[1][1]*(ph+1)+M[1][2];
P[4] = M[0][0]*(pw+1)+M[0][1]*ph+M[0][2];
P[5] = M[1][0]*(pw+1)+M[1][1]*ph+M[1][2];
P[6] = M[0][0]*(pw+1)+M[0][1]*(ph+1)+M[0][2];
P[7] = M[1][0]*(pw+1)+M[1][1]*(ph+1)+M[1][2];
T leftMost = (max(round(min(min(P[0],P[2]),min(P[4],P[6]))),0.0));
T rightMost= (min(round(max(max(P[0],P[2]),max(P[4],P[6]))),imageWidth-1.0));
T topMost= (max(round(min(min(P[1],P[3]),min(P[5],P[7]))),0.0));
T bottomMost= (min(round(max(max(P[1],P[3]),max(P[5],P[7]))),imageHeight-1.0));
//float maxval = 0;
//int maxidx = -1;
const T* offset_bottom_data = bottom_data + (roi_batch_ind * channels + c) * height * width;
//float AB[2];
//AB[0] = P[2] - P[0];
//AB[1] = P[3] - P[1];
//float ABAB = AB[0]*AB[0] +AB[1]*AB[1];
//float AC[2];
//AC[0] = P[4] - P[0];
//AC[1] = P[5] - P[1];
//float ACAC = AC[0]*AC[0] + AC[1]*AC[1];
float bin_cx = (leftMost + rightMost) / 2.0; // shift
float bin_cy = (topMost + bottomMost) / 2.0;
int bin_l = (int)floor(bin_cx);
int bin_r = (int)ceil(bin_cx);
int bin_t = (int)floor(bin_cy);
int bin_b = (int)ceil(bin_cy);
T lt_value = 0.0;
if (bin_t > 0 && bin_l > 0 && bin_t < height && bin_l < width)
lt_value = offset_bottom_data[bin_t * width + bin_l];
T rt_value = 0.0;
if (bin_t > 0 && bin_r > 0 && bin_t < height && bin_r < width)
rt_value = offset_bottom_data[bin_t * width + bin_r];
T lb_value = 0.0;
if (bin_b > 0 && bin_l > 0 && bin_b < height && bin_l < width)
lb_value = offset_bottom_data[bin_b * width + bin_l];
T rb_value = 0.0;
if (bin_b > 0 && bin_r > 0 && bin_b < height && bin_r < width)
rb_value = offset_bottom_data[bin_b * width + bin_r];
T rx = bin_cx - floor(bin_cx);
T ry = bin_cy - floor(bin_cy);
T wlt = (1.0 - rx) * (1.0 - ry);
T wrt = rx * (1.0 - ry);
T wrb = rx * ry;
T wlb = (1.0 - rx) * ry;
T inter_val = 0.0;
inter_val += lt_value * wlt;
inter_val += rt_value * wrt;
inter_val += rb_value * wrb;
inter_val += lb_value * wlb;
atomicAdd(top_data + index, static_cast<T>(inter_val));
atomicAdd(con_idx_x + index, static_cast<float>(bin_cx));
atomicAdd(con_idx_y + index, static_cast<float>(bin_cy));
//top_data[index] = static_cast<T>(inter_val);
//con_idx_x[index] = bin_cx;
//con_idx_y[index] = bin_cy;
}
}