k Nearest Neighbor Search by CUDA

最新推荐文章于 2023-03-29 10:28:09 发布

hongqiang200

最新推荐文章于 2023-03-29 10:28:09 发布

阅读量1.5k

点赞数

分类专栏： cuda 文章标签： cuda search float 桌面环境 algorithm library

cuda 专栏收录该内容

27 篇文章 0 订阅

订阅专栏

kNN（k Nearest Neighbor）是常用的群集算法（Cluster Algorithm）用于空间搜索。目前最快的kNN方法莫过于KDTree的版本，不过基本上都是CPU的比如ANN C++ Library。对于GPU来说，实现加速结构比较复杂，因为没有栈所以无法递归，而且执行资源有限，不能像CPU一样舒舒服服的顺序执行。

为了方便起见我直接用komrade粘合C++与CUDA，下面是测试程序的代码。

#include < komrade / device_vector.h >
#include < komrade / transform.h >
#include < komrade / range.h >
#include < komrade / copy.h >
#include < komrade / fill.h >
#include < komrade / sort.h >
#include < komrade / replace.h >
#include < komrade / functional.h >
#include < iostream >
#include < iterator >
#include < math.h >
#include < memory >
#include < boost / timer.hpp >

using namespace std;

#include < ANN / ANN.h >
#pragma comment(lib,"ANN.lib")

struct dist
{
    dist( float qx, float qy, float qz) : x(qx), y(qy), z(qz)
    {
    }
     float x,y,z;

    __host__ __device__ float operator ()( const float3 & p) const
    {
         float a = p.x - x;
         float b = p.y - y;
         float c = p.z - z;
         return a * a + b * b + c * c;
    }
};

int main( int argc, char * argv[])
{
     if ( argc < 2 )
         return - 1 ;

     int N = 0 ;
    sscanf(argv[ 1 ], " %d " , & N);

    boost::timer Stopwatch;
    ANNpointArray ANNPh = annAllocPts(N, 3 );
    komrade::host_vector < float3 > Ph(N);
    komrade::device_vector < int > Idx(N);
    komrade::device_vector < float > Dist(N);


     for ( int i = 0 ; i < N; ++ i)
    {
         float x = ( float )rand() / ( float )RAND_MAX;;
         float y = ( float )rand() / ( float )RAND_MAX;;
         float z = ( float )rand() / ( float )RAND_MAX;;

        Ph[i].x = x;
        Ph[i].y = y;
        Ph[i].z = z;

        ANNPh[i][ 0 ] = x;
        ANNPh[i][ 1 ] = y;
        ANNPh[i][ 2 ] = z;

         if ( N < 32 )
            printf( " <%f %f %f>\n " ,x,y,z);
        Idx[i] = i;
    }
    cout << fixed << " Generate Data Used " << Stopwatch.elapsed() << endl;
    Stopwatch.restart();

     float QueryPos[ 3 ];
    QueryPos[ 0 ] = ( float )rand() / ( float )RAND_MAX * 0.1f + 0.5f ;
    QueryPos[ 1 ] = ( float )rand() / ( float )RAND_MAX * 0.1f + 0.5f ;
    QueryPos[ 2 ] = ( float )rand() / ( float )RAND_MAX * 0.1f + 0.5f ;
     if ( N < 32 )
        printf( " [%f %f %f]\n " ,QueryPos[ 0 ],QueryPos[ 1 ],QueryPos[ 2 ]);

    komrade::device_vector < float3 > Pd = Ph;
    cout << fixed << " Copy Data Used " << Stopwatch.elapsed() << endl;
    Stopwatch.restart();

    komrade::transform(Pd.begin(), Pd.end(), Dist.begin(), dist(QueryPos[ 0 ], QueryPos[ 1 ], QueryPos[ 2 ]));
    komrade::sort_by_key(Dist.begin(), Dist.end(), Idx.begin());
    cout << fixed << " Sort Data by CUDA Used " << Stopwatch.elapsed() << endl;
    Stopwatch.restart();

    ANNkd_tree ANNTree(ANNPh,N, 3 );
    ANNidx ANNIdx[ 1 ];
    ANNdist ANNDst[ 1 ];
    ANNTree.annkSearch(QueryPos, 1 ,ANNIdx,ANNDst);
    cout << fixed << " Sort Data by ANN Used " << Stopwatch.elapsed() << endl;


     if ( N < 32 )
    {
        copy(Dist.begin(), Dist.end(), ostream_iterator < float > (cout, " " ));
        cout << endl;
        copy(Idx.begin(), Idx.end(), ostream_iterator < int > (cout, " " ));
    }


     return 0 ;
}

下面是性能分析对比，可以看到CUDA版本在大量点的情况下还是非常有优势的，不过在纯RenderFarm就用不上了，恐怕只能在装备有G80以上芯片的桌面环境或者工作站上执行。测试环境如下，

Intel E5200@2.9G
Apacer 2Gx2
9800GT 512M
CUDA 2.2, komrade 0.9

	10	100	1000	10000	100000	1000000
CUDA	0.003	0.003	0.00300	0.007	0.012	0.038
ANN	0.000	0.000	0.00100	0.008	0.131	2.490

from: http://www.cnblogs.com/Jedimaster/archive/2009/06/07/1497985.html

hongqiang200

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
k Nearest Neighbor Search by CUDA

kNN（k Nearest Neighbor）是常用的群集算法（Cluster Algorithm）用于空间搜索。目前最快的kNN方法莫过于KDTree的版本，不过基本上都是CPU的比如ANN C++ Library。对于GPU来说，实现加速结构比较复杂，因为没有栈所以无法递归，而且执行资源有限，不能像CPU一样舒舒服服的顺序执行。为了方便起见我直接用komrade粘合C++与CUDA，下面是
复制链接

扫一扫