#ifndef SYNCEDMEM_HPP_
#define SYNCEDMEM_HPP_
#include <cstdlib>
#include "caffe/common.hpp"
namespace caffe
{
inline void CaffeMallocHost(void** ptr, size_t size, bool* use_cuda)
{
#ifndef CPU_ONLY
if(Caffe::mode() == Caffe::GPU)
{
CUDA_CHECK(cudaMallocHost(ptr, size));
*use_cuda = true;
return ;
}
#endif
*ptr = malloc(size);
*use_cuda = false;
CHECK(*ptr)<<"host allocation of size "<< size <<" failed";
}
inline void CaffeFreeHost(void* ptr, bool use_cuda)
{
#ifndef CPU_ONLY
if(use_cuda)
{
CUDA_CHECK(cudaFreeHost(ptr));
return ;
}
#endif
free(ptr);
}
class SyncedMemory
{
public:
/*构造函数将初始化各种指针*/
SyncedMemory()
: cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),
own_cpu_data_(false), cpu_malloc_use_cuda_(false),own_gpu_data_(false),
gpu_device_(-1){}
explicit SyncedMemory(size_t size) : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size),
head_(UNINITIALIZED), own_cpu_data_(false), cpu_malloc_use_cuda_(false),
own_gpu_data_(false), gpu_device_(-1){}
~SyncedMemory(){};
public:
/*cpu_data()和gpu_data()返回值为const void* 表示cpu_ptr_和gpu_ptr_所指向的内存空间不允许被修改
* 与此相反void* mutable_cpu_data() 和 void* mutable_gpu_data(); 返回的是void* 的指针,也即内存返回的
* 内存空间是允许修改的*/
const void* cpu_data();
void set_cpu_data(void* data);
const void* gpu_data();
void set_gpu_data(void* data);
void* mutable_cpu_data();
void* mutable_gpu_data();
enum SyncedHead {UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED};
SyncedHead head() {return head_;}
size_t size() {return size_;}
#ifndef CPU_ONLY
void async_gpu_push(const cudaStream_t& stream);
#endif
private:
SyncedHead head_;
void to_cpu();
void to_gpu();
void* cpu_ptr_;
void* gpu_ptr_;
size_t size_;
bool own_cpu_data_;
bool cpu_malloc_use_cuda_;
bool own_gpu_data_;
int gpu_device_;
DISABLE_COPY_AND_ASSIGN(SyncedMemory);
};
};
#endif /* SYNCEDMEM_HPP_ */
---------------------------------------------
---------------------------------------------
---------------------------------------------
#include "common.hpp"
#include "syncedmem.hpp"
#include "util/math_functions.hpp"
namespace caffe
{
SyncedMemory::~SyncedMemory()
{
if(cpu_ptr_ && own_cpu_data_)
{
CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_);
}
#ifndef CPU_ONLY
if(gpu_ptr_ && own_gpu_data_)
{
int initial_device;
cudaGetDevice(&initial_device);
if (gpu_device_ != -1)
{
CUDA_CHECK(cudaSetDevice(gpu_device_));
}
CUDA_CHECK(cudaFree(gpu_ptr_));
cudaSetDevice(initial_device);
}
#endif
}
inline void SyncedMemory::to_cpu()
{
switch (head_)
{
case UNINITIALIZED:
CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
caffe_memset(size_, 0, cpu_ptr_);
head_ = HEAD_AT_CPU;
own_cpu_data_ = true;
break;
case HEAD_AT_GPU:
#ifndef CPU_ONLY//Makefile.config中定义
if (cpu_ptr_ == NULL)
{
CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
own_cpu_data_ = true;
}
caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_);
head_ = SYNCED;
#else
NO_GPU;
#endif
break;
case HEAD_AT_CPU:
case SYNCED:
break;
}
}
inline void SyncedMemory::to_gpu()
{
#ifndef CPU_ONLY
switch (head_)
{
case UNINITIALIZED:
head_ = HEAD_AT_GPU;
own_gpu_data_ = true;
break;
case HEAD_AT_CPU:
if(gpu_ptr_ == NULL)
{
CUDA_CHECK(cudaGetDevice(&gpu_device_));
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
own_gpu_data_ = true;
}
caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);
head_ = SYNCED;
break;
case HEAD_AT_GPU:
case SYNCED:
break;
}
#else
NO_GPU
#endif
}
const void* SyncedMemory::cpu_data()
{
to_cpu();
return (const void*)cpu_ptr_;
}
void SyncedMemory::set_cpu_data(void* data)
{
CHECK(data);
if(own_cpu_data_)
{
CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_);
}
cpu_ptr_ = data;
head_ = HEAD_AT_CPU;
own_cpu_data_ = false;
}
const void* SyncedMemory::gpu_data()
{
#ifndef CPU_ONLY
to_gpu();
return (const void*)gpu_ptr_;
#else
NO_GPU;
return NULL;
#endif
}
void SyncedMemory::set_gpu_data(void* data)
{
#ifndef CPU_ONLY
CHECK(data);
if (own_gpu_data_)
{
int initial_device;
cudaGetDevice(&initial_device);
if (gpu_device_ != -1)
{
CUDA_CHECK(cudaSetDevice(gpu_device_));
}
CUDA_CHECK(cudaFree(gpu_ptr_));
cudaSetDevice(initial_device);
}
gpu_ptr_ = data;
head_ = HEAD_AT_GPU;
own_gpu_data_ = false;
#else
NO_GPU;
#endif
}
void* SyncedMemory::mutable_cpu_data()
{
to_cpu();
head_ = HEAD_AT_CPU;
return cpu_ptr_;
}
void* SyncedMemory::mutable_gpu_data()
{
#ifndef CPU_ONLY
to_gpu();
head_ = HEAD_AT_GPU;
return gpu_ptr_;
#else
NO_GPU;
return NULL;
#endif
}
#ifndef CPU_ONLY
void SyncedMemory::async_gpu_push(const cudaStream_t& stream)
{
CHECK(head_ == HEAD_AT_CPU);
if (gpu_ptr_ == NULL)
{
CUDA_CHECK(cudaGetDevice(&gpu_device_));
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
own_gpu_data_ = true;
}
const cudaMemcpyKind put = cudaMemcpyHostToDevice;
CUDA_CHECK(cudaMemcpyAsync(gpu_ptr_, cpu_ptr_, size_, put, stream));
head_ = SYNCED;
}
#endif
};
自己写的测试代码分析构造函数和析构函数的行为
#include <iostream>
#include <climits>
#include <cstdlib>
#include <boost/shared_ptr.hpp>
using namespace std;
using boost::shared_ptr;
inline void CaffeMallocoHost(void** ptr, size_t size)
{
*ptr = malloc(size);
if(ptr == NULL)
{
cout<<"malloc error in fuction CaffeMallocoHost !"<<endl;
}
}
inline void CaffeFreeHost(void* ptr)
{
cout<<">>>>>>>>>>>>>CaffeFreeHost";
if(ptr != NULL)
{
free(ptr);
cout<<">>>>>>>>>>>now free cpu_ptr_ "<<endl;
}
}
class synced
{
public:
synced(size_t size, int num) : cpu_ptr_(NULL),
gpu_ptr_(NULL), own_cpu_data_(false),own_gpu_data_(false),
size_(size), cpu_malloc_use_cuda_(0),num_(num){cout<<"constructor "<< num_<<" called !\n";}
~synced()
{
CaffeFreeHost(cpu_ptr_);
cout<<"destructor "<<num_<<" called!\n";
}
void to_cpu()
{
CaffeMallocoHost(&cpu_ptr_, size_);
}
void* cpu_data()
{
to_cpu();
return cpu_ptr_;
}
private:
void* cpu_ptr_;
bool own_cpu_data_;
void* gpu_ptr_;
bool own_gpu_data_;
bool cpu_malloc_use_cuda_;
size_t size_;
int num_;
};
int main()
{
shared_ptr<synced> data;
data.reset(new synced(10 * sizeof(int), 1));
int* ptr = static_cast<int*>(data->cpu_data());
ptr[9] = 10;
return 0;
}
抛出问题: caffe内存管理如何使用new delete 形式重写CaffeFreeHost() ; CaffeMallocHost()。 主要困难是c++
如何分配一个void* 并delete 一个void* 。