GitHub (CUDA):https://github.com/yiwuyao3863/locality_aware_nms_east
EAST文本检测涉及的Locality-aware NMS,通过合并同类型的倾斜框(IOU高于一定的阈值),能够提高长文本框检测的有效性,但CPU版本的Locality-aware NMS执行速度较慢。CUDA版本的Locality-aware NMS如下:
#include "rotate_gpu_nms.hpp"
#include <vector>
#include <iostream>
#include <cmath>
#define CUDA_CHECK(condition) \
/* Code block avoids redefinition of cudaError_t error */ \
do { \
cudaError_t error = condition; \
if (error != cudaSuccess) { \
std::cout << cudaGetErrorString(error) << std::endl; \
} \
} while (0)
int const threadsPerBlock = 1024;
__device__ inline float sqr_d(float x) { return x * x; }
__device__ inline float trangle_area(float * a, float * b, float * c) {
return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0]))/2.0;
}
__device__ inline float area(float * int_pts, int num_of_inter) {
float area = 0.0;
for(int i = 0;i < num_of_inter - 2;i++) {
area += fabs(trangle_area(int_pts, int_pts + 2 * i + 2, int_pts + 2 * i + 4));
}
return area;
}
__device__ inline float trangle_area_rect(const float * a, const float * b, const float * c) {
return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0]))/2.0;
}
__device__ inline float area_rect(const float * int_pts, int num_of_inter) {
float area = 0.0;
for(int i = 0;i < num_of_inter - 2;i++) {
area += fabs(trangle_area_rect(int_pts, int_pts + 2 * i + 2, int_pts + 2 * i + 4));
}
return area;
}
__device__ inline void reorder_pts(float * int_pts, int num_of_inter) {
if(num_of_inter > 0) {
float center[2];
center[0] = 0.0;
center[1] = 0.0;
for(int i = 0;i < num_of_inter;i++) {
center[0] += int_pts[2 * i];
center[1] += int_pts[2 * i + 1];
}
center[0] /= num_of_inter;
center[1] /= num_of_inter;
float vs[16];
float v[2];
float d;
for(int i = 0;i < num_of_inter;i++) {
v[0] = int_pts[2 * i]-c