caffe 设备切换

Caffe 内存管理

Caffe::Blob

Blob是一个多维的数组,可以位于内存,也可以位于显存

class Blob {
protected:
	shared_ptr<SyncedMemory> data_; // 正向传播数据
	shared_ptr<SyncedMemory> diff_; // 反向传播误差vector<int> shape_;
	vector<int> shape_;
}

SyncedMemory

/*
UNINITIALIZED:未初始化。尚未分配任何资源
HEAD_AT_CPU:CPU 拥有最新的数据
HEAD_AT_GPU:GPU 拥有最新的数据
SYNCED:CPU 和 GPU 都拥有最新的数据
*/
class SyncedMemory {
enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
private:
	void to_cpu();
	void to_gpu();
	void* cpu_ptr_; // 获取CPU数据指针
	void* gpu_ptr_;
	SyncedHead head_; // 当前 SyncedMemory处于的状态
}
const void* SyncedMemory::cpu_data() { 
	to_cpu();  // 分配空间、跨设备拷贝
	return (const void*)cpu_ptr_; 
}
inline void SyncedMemory::to_cpu() {
  switch (head_) {
  case UNINITIALIZED:
    CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
    caffe_memset(size_, 0, cpu_ptr_);
    head_ = HEAD_AT_CPU;
    own_cpu_data_ = true;
    break;
  case HEAD_AT_GPU:
#ifndef CPU_ONLY
    if (cpu_ptr_ == NULL) {
      CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
      own_cpu_data_ = true;
    }
    caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_);
    head_ = SYNCED;
#else
    NO_GPU;
#endif
    break;
  case HEAD_AT_CPU:
  case SYNCED:
    break;
  }
}

Caffe brew

A Brew is referred to as a mode of operation that determines the target architecture

define brew

class Caffe {
	enum Brew { CPU, GPU };
	Brew mode_;
}

在这里插入图片描述

set_mode()

不推荐中途修改mode

// The setters for the variables
// Sets the mode. It is recommended that you don't change the mode halfway
// into the program since that may cause allocation of pinned memory being
// freed in a non-pinned way, which may cause problems - I haven't verified
// it personally but better to note it here in the header file.
inline static void set_mode(Brew mode) { Get().mode_ = mode; 
/*
对于cuda编程框架来说,主机端内存分为二种,
* 可分页内存(pageable memory), malloc()
* 页锁内存(pinned memory), cudaHostAlloc()
pinned memory主要属性是主机的操作系统不会对该块内存进行分页和换出,
确保该内存始终驻留在物理内存中,从而提高CPU和GPU之间的数据传输效率。
*/

caffe::layer

Caffe的Layer执行顺序是用户定义prototxt中的Layer顺序

Dtype Layer::Forward(bottom, top) {
switch (Caffe::mode()) {
	case Caffe::CPU:
		Forward_cpu(bottom, top);
		break;
	case Caffe::GPU:
		Forward_gpu(bottom, top);
		break;
}

Caffe 源码修改

caffe/src/caffe/proto

message LayerParameter { optional string ctx = 250; }

caffe/include/caffe/layer.hpp

string ctx;
inline void set_ctx(string x) {
  ctx = x;
}
inline string get_ctx() {
  return ctx;
}

caffe/src/caffe/net.cpp


template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
  CHECK_GE(start, 0);
  CHECK_LT(end, layers_.size());
  Dtype loss = 0;
  for (int i = start; i <= end; ++i) {
    // add code start
    LOG(INFO) << "Forwarding " << layer_names_[i];
    LOG(INFO) << "ctx = " << layers_[i]->get_ctx();
    string ctx = layers_[i]->get_ctx();
    if (ctx == "cpu") {
        Caffe::set_mode(Caffe::CPU);
    } else if (ctx == "gpu") {
        Caffe::set_mode(Caffe::GPU);
    }
    // add code end
    Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]);
    loss += layer_loss;
    if (debug_info_) { ForwardDebugInfo(i); }
  }
  return loss;
}
// backword 同理

caffe/examples/mnist/lenet_solver.prototxt

solver_mode: CPU

caffe/examples/mnist/lenet_train_test.prototxt

layer {
  ctx : "gpu"
  ...
}

运行

  • ./build/tools/caffe train -solver=./examples/mnist/lenet_solver.prototxt
  • 一开始声明mode是cpu,在prototxt修改layer的ctx,改成gpu,如果发现gpu在使用,证明修改成功
    • nvidia-smi命令可以看到./build/tools/caffe正在使用gpu,type为C,表示计算而不是G(显示)
    • 看运行时间,gpu比cpu运行快多了

参考

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值