caffe 设备切换

最新推荐文章于 2020-08-13 14:15:06 发布

齐豪

最新推荐文章于 2020-08-13 14:15:06 发布

阅读量143

点赞数

分类专栏： DP框架

本文链接：https://blog.csdn.net/qq_33096883/article/details/90734537

版权

DP框架专栏收录该内容

12 篇文章 0 订阅

订阅专栏

Caffe 内存管理

Caffe::Blob

Blob是一个多维的数组，可以位于内存，也可以位于显存

class Blob {
protected:
	shared_ptr<SyncedMemory> data_; // 正向传播数据
	shared_ptr<SyncedMemory> diff_; // 反向传播误差vector<int> shape_;
	vector<int> shape_;
}

SyncedMemory

/*
UNINITIALIZED：未初始化。尚未分配任何资源
HEAD_AT_CPU：CPU 拥有最新的数据
HEAD_AT_GPU：GPU 拥有最新的数据
SYNCED：CPU 和 GPU 都拥有最新的数据
*/
class SyncedMemory {
enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
private:
	void to_cpu();
	void to_gpu();
	void* cpu_ptr_; // 获取CPU数据指针
	void* gpu_ptr_;
	SyncedHead head_; // 当前 SyncedMemory处于的状态
}

const void* SyncedMemory::cpu_data() { 
	to_cpu();  // 分配空间、跨设备拷贝
	return (const void*)cpu_ptr_; 
}
inline void SyncedMemory::to_cpu() {
  switch (head_) {
  case UNINITIALIZED:
    CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
    caffe_memset(size_, 0, cpu_ptr_);
    head_ = HEAD_AT_CPU;
    own_cpu_data_ = true;
    break;
  case HEAD_AT_GPU:
#ifndef CPU_ONLY
    if (cpu_ptr_ == NULL) {
      CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
      own_cpu_data_ = true;
    }
    caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_);
    head_ = SYNCED;
#else
    NO_GPU;
#endif
    break;
  case HEAD_AT_CPU:
  case SYNCED:
    break;
  }
}

Caffe brew

A Brew is referred to as a mode of operation that determines the target architecture

define brew

class Caffe {
	enum Brew { CPU, GPU };
	Brew mode_;
}

在这里插入图片描述

set_mode()

不推荐中途修改mode

// The setters for the variables
// Sets the mode. It is recommended that you don't change the mode halfway
// into the program since that may cause allocation of pinned memory being
// freed in a non-pinned way, which may cause problems - I haven't verified
// it personally but better to note it here in the header file.
inline static void set_mode(Brew mode) { Get().mode_ = mode; 
/*
对于cuda编程框架来说，主机端内存分为二种，
* 可分页内存（pageable memory), malloc()
* 页锁内存（pinned memory), cudaHostAlloc()
pinned memory主要属性是主机的操作系统不会对该块内存进行分页和换出，
确保该内存始终驻留在物理内存中，从而提高CPU和GPU之间的数据传输效率。
*/

caffe::layer

Caffe的Layer执行顺序是用户定义prototxt中的Layer顺序

Dtype Layer::Forward(bottom, top) {
switch (Caffe::mode()) {
	case Caffe::CPU:
		Forward_cpu(bottom, top);
		break;
	case Caffe::GPU:
		Forward_gpu(bottom, top);
		break;
}

Caffe 源码修改

caffe/src/caffe/proto

message LayerParameter { optional string ctx = 250; }

caffe/include/caffe/layer.hpp

string ctx;
inline void set_ctx(string x) {
  ctx = x;
}
inline string get_ctx() {
  return ctx;
}

caffe/src/caffe/net.cpp


template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
  CHECK_GE(start, 0);
  CHECK_LT(end, layers_.size());
  Dtype loss = 0;
  for (int i = start; i <= end; ++i) {
    // add code start
    LOG(INFO) << "Forwarding " << layer_names_[i];
    LOG(INFO) << "ctx = " << layers_[i]->get_ctx();
    string ctx = layers_[i]->get_ctx();
    if (ctx == "cpu") {
        Caffe::set_mode(Caffe::CPU);
    } else if (ctx == "gpu") {
        Caffe::set_mode(Caffe::GPU);
    }
    // add code end
    Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]);
    loss += layer_loss;
    if (debug_info_) { ForwardDebugInfo(i); }
  }
  return loss;
}
// backword 同理

caffe/examples/mnist/lenet_solver.prototxt

solver_mode: CPU

caffe/examples/mnist/lenet_train_test.prototxt

layer {
  ctx : "gpu"
  ...
}

运行

./build/tools/caffe train -solver=./examples/mnist/lenet_solver.prototxt
一开始声明mode是cpu，在prototxt修改layer的ctx，改成gpu，如果发现gpu在使用，证明修改成功
- nvidia-smi命令可以看到./build/tools/caffe正在使用gpu，type为C，表示计算而不是G（显示）
- 看运行时间，gpu比cpu运行快多了