关于CUDA6.5中newdelete的解读

最新推荐文章于 2023-01-07 06:20:18 发布

zhbxlm

最新推荐文章于 2023-01-07 06:20:18 发布

阅读量1.3k

点赞数

分类专栏： CUDA

本文链接：https://blog.csdn.net/zhbxlm/article/details/45342619

版权

CUDA 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

最近在检查自己项目中存在的问题，涉及到了new 和 delete 的问题，于是查找CUDA6.5中的simple，中间有关于newdelete的例子，我本想随意参考一下吧，没想到，这次参考倒是让我研究了整整一天多的时间

这里贴出部分代码：

template<class T>
class Container {

public:
   __device__
   Container() {;}

       __device__
        virtual ~Container() {;}

   __device__
   virtual void push(T e) = 0;

   __device__
   virtual bool pop(T &e) = 0;
};

template<class T>
class Vector : public Container<T> {

public:
   // Constructor, data is allocated on the heap
    // NOTE: This must be called from only one thread
   __device__
   Vector(int max_size) : m_top(-1) {
       m_data = new T[max_size];
   }

   // Constructor, data uses preallocated buffer via placement new
   __device__
   Vector(int max_size, T* preallocated_buffer) : m_top(-1) {
       m_data = new (preallocated_buffer) T[max_size];
   }

    // Destructor, data is freed
    // NOTE: This must be called from only one thread
   __device__
   ~Vector() {
       if( m_data ) delete [] m_data;
   }

   __device__
   virtual
   void push(T e) {
        if( m_data ) {
            // Atomically increment the top idx
            int idx = atomicAdd(&(this->m_top), 1);
            m_data[idx+1] = e;
        }
   }

   __device__
   virtual
   bool pop(T &e) {
       if( m_data && m_top >= 0 ) {
           // Atomically decrement the top idx
           int idx = atomicAdd( &(this->m_top), -1 );
           if( idx >= 0 ) {
               e = m_data[idx];
               return true;
           }
       }
       return false;

   }

private:
   int m_size;
   T* m_data;

   int m_top;
};

__global__
void placementNew(int *d_result)
{
   /***********sizeof(Vector<int>)的大小为Vector<int>=两个虚函数+三个属性变量+指向虚表指针+Container虚表=32*************/
    __shared__ unsigned char __align__(8) s_buffer[sizeof(Vector<int>)]; //声明Vector<int>大小，且每8字节对齐（这里好像有规定，结构体需8直接对齐）
    __shared__ int __align__(8) s_data[1024];                   //声明1024个int型数组，每8字节对齐
    __shared__ Vector<int> *s_vector;                           //

    // The first thread of the block initializes the shared Vector object.
    // The placement new operator enables the Vector object and the data array top be placed in shared memory.
    if (threadIdx.x == 0)
    {
        s_vector = new(s_buffer) Vector<int>(1024, s_data);         //这里先分配首先将创建的容器首地址放在s_buffer指向的地址中
    }                                                               //然后再为m_data地址指向s_data，且使其分配s_data[1024]，全部占用

    __syncthreads();

    if ((threadIdx.x & 1) == 0)
    {
        s_vector->push(threadIdx.x >> 1);
    }

    // Need to sync as the vector implementation does not support concurrent push/pop operations.
    __syncthreads();

    int v;

    if (s_vector->pop(v))
    {
        d_result[threadIdx.x] = v;
    }
    else
    {
        d_result[threadIdx.x] = -1;
    }

    // Note: deleting objects placed in shared memory is not necessary (lifetime of shared memory is that of the block)
}

其中的注释是个人理解加上的。

按照代码顺序将这次将所有收获总结一下：

1. __align__(8)问题，虽然我以前懂得字节对齐的原因，与具体用法，不过在我看来32位系统4字节对齐就可以了，这里出现8字节对齐，让我有些困惑，在网上看到有位仁兄提到，对于定义结构体或共同体时对齐的规定为8，http://blog.csdn.net/bruce0532/article/details/4719540

2. sizeof(Vector<int>)问题，这里我本来没有怎么注意，后来随意看一下大小，却是出乎我的预料，我开始的计算是，Vector的属性为4+4+4，加上虚函数表指针4+4，加上继承来的Container虚表4+4，共计28，可结果是32，后来才晓得我以前从没有在意过，Vector中还保留了以个指向Container表的指针4，共计刚好32位

3. placementnew问题，其实详细阅读程序的困惑就是来自于这个问题，s_vector = new(s_buffer) Vector<int>(1024, s_data);这句话困惑了我好久，以前没有接触这种new的使用方式，网上查阅加上单步调试得到如注释的结论，可能是我习惯了new出来的就在堆里这个结论吧，这里指定在共享寄存器上new空间

4. 这里因为new而又详细阅读了关于传值与传引用的问题，这里就不再细说

zhbxlm

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
关于CUDA6.5中newdelete的解读

最近在检查自己项目中存在的问题，涉及到了new 和 delete 的问题，于是查找CUDA6.5中的simple，中间有关于newdelete的例子，我本想随意参考一下吧，没想到，这次参考倒是让我研究了整整一天多的时间这里贴出部分代码：templateclass Vector : public Container {public: // Con
复制链接

扫一扫