Caffe 内存管理
Caffe::Blob
Blob是一个多维的数组,可以位于内存,也可以位于显存
class Blob {
protected:
shared_ptr<SyncedMemory> data_; // 正向传播数据
shared_ptr<SyncedMemory> diff_; // 反向传播误差vector<int> shape_;
vector<int> shape_;
}
SyncedMemory
/*
UNINITIALIZED:未初始化。尚未分配任何资源
HEAD_AT_CPU:CPU 拥有最新的数据
HEAD_AT_GPU:GPU 拥有最新的数据
SYNCED:CPU 和 GPU 都拥有最新的数据
*/
class SyncedMemory {
enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
private:
void to_cpu();
void to_gpu();
void* cpu_ptr_; // 获取CPU数据指针
void* gpu_ptr_;
SyncedHead head_; // 当前 SyncedMemory处于的状态
}
const void* SyncedMemory::cpu_data() {
to_cpu(); // 分配空间、跨设备拷贝
return (const void*)cpu_ptr_;
}
inline void SyncedMemory::to_cpu() {
switch (head_) {
case UNINITIALIZED:
CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
caffe_memset(size_, 0, cpu_ptr_);
head_ = HEAD_AT_CPU;
own_cpu_data_ = true;
break;
case HEAD_AT_GPU:
#ifndef CPU_ONLY
if (cpu_ptr_ == NULL) {
CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
own_cpu_data_ = true;
}
caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_);
head_ = SYNCED;
#else
NO_GPU;
#endif
break;
case HEAD_AT_CPU:
case SYNCED:
break;
}
}
Caffe brew
A Brew is referred to as a mode of operation that determines the target architecture
define brew
class Caffe {
enum Brew { CPU, GPU };
Brew mode_;
}
set_mode()
不推荐中途修改mode
// The setters for the variables
// Sets the mode. It is recommended that you don't change the mode halfway
// into the program since that may cause allocation of pinned memory being
// freed in a non-pinned way, which may cause problems - I haven't verified
// it personally but better to note it here in the header file.
inline static void set_mode(Brew mode) { Get().mode_ = mode;
/*
对于cuda编程框架来说,主机端内存分为二种,
* 可分页内存(pageable memory), malloc()
* 页锁内存(pinned memory), cudaHostAlloc()
pinned memory主要属性是主机的操作系统不会对该块内存进行分页和换出,
确保该内存始终驻留在物理内存中,从而提高CPU和GPU之间的数据传输效率。
*/
caffe::layer
Caffe的Layer执行顺序是用户定义prototxt中的Layer顺序
Dtype Layer::Forward(bottom, top) {
switch (Caffe::mode()) {
case Caffe::CPU:
Forward_cpu(bottom, top);
break;
case Caffe::GPU:
Forward_gpu(bottom, top);
break;
}
Caffe 源码修改
caffe/src/caffe/proto
message LayerParameter { optional string ctx = 250; }
caffe/include/caffe/layer.hpp
string ctx;
inline void set_ctx(string x) {
ctx = x;
}
inline string get_ctx() {
return ctx;
}
caffe/src/caffe/net.cpp
template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
CHECK_GE(start, 0);
CHECK_LT(end, layers_.size());
Dtype loss = 0;
for (int i = start; i <= end; ++i) {
// add code start
LOG(INFO) << "Forwarding " << layer_names_[i];
LOG(INFO) << "ctx = " << layers_[i]->get_ctx();
string ctx = layers_[i]->get_ctx();
if (ctx == "cpu") {
Caffe::set_mode(Caffe::CPU);
} else if (ctx == "gpu") {
Caffe::set_mode(Caffe::GPU);
}
// add code end
Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]);
loss += layer_loss;
if (debug_info_) { ForwardDebugInfo(i); }
}
return loss;
}
// backword 同理
caffe/examples/mnist/lenet_solver.prototxt
solver_mode: CPU
caffe/examples/mnist/lenet_train_test.prototxt
layer {
ctx : "gpu"
...
}
运行
./build/tools/caffe train -solver=./examples/mnist/lenet_solver.prototxt
- 一开始声明mode是cpu,在prototxt修改layer的ctx,改成gpu,如果发现gpu在使用,证明修改成功
nvidia-smi
命令可以看到./build/tools/caffe正在使用gpu,type为C,表示计算而不是G(显示)- 看运行时间,gpu比cpu运行快多了