MXAPIPredictor结构体在 c_predict_api .cc中定义:
// predictor interface
struct MXAPIPredictor
{
std::vector<NDArray> out_arrays; // output arrays
std::vector<NDArray> arg_arrays; // argument arrays
std::vector<NDArray> aux_arrays; // auxiliary arrays
std::vector<TShape> out_shapes; // output shapes
std::vector<uint32_t> out_shapes_buffer; // uint32_t buffer for output shapes
std::unordered_map<std::string, size_t> key2arg; // key to arguments
std::unique_ptr<Executor> exec; // executor
nnvm::Symbol sym; // symbol
Context ctx; // Context
};
包含输入数据arg_arrays、aux_arrays,输出数据和尺度out_arrays、out_shapes、out_shapes_buffer,网络结构key2arg,执行器exec,NNVM的编译器sym,上下文关系ctx。其中执行器的Executor类为:
class Executor
{
public:
virtual ~Executor() {}
virtual void Forward(bool is_train) = 0;//get the result
virtual void PartialForward(bool is_train, int step, int *step_left) = 0;//issue operation specified by step.
virtual void Backward(const std::vector<NDArray> &head_grads, bool is_train = true) = 0;//NDArrays specified by grad_in_args_store will be updated accordingly.
virtual void Print(std::ostream &os) const {} // print the execution plan info to output stream.
virtual const std::vector<NDArray> &outputs() const = 0;//get array of outputs in the executor
virtual const std::unordered_map<std::string, NDArray>& in_arg_map() const = 0;//input argument map in the executor
virtual const std::unordered_map<std::string, NDArray>& arg_grad_map() const = 0;//get input argument graident map
virtual const std::unordered_map<std::string, NDArray>& aux_state_map() const = 0;//get aux state map
//Return a new executor with the same symbol and shared memory, but different input/output shapes
virtual Executor* Reshape(const bool partial_shaping,
const bool allow_up_sizing,
const Context& default_ctx,
const std::map<std::string, Context>& ctx_map,
const std::unordered_map<std::string, TShape>&
provided_arg_shapes,
std::vector<NDArray>* in_args,
std::vector<NDArray>* arg_grads,
std::vector<NDArray>* aux_states) = 0;
//Create an operator by bind symbol with context and arguments.If user do not want to compute the gradients of i-th argument, grad_req_type[i] can be kNullOp.
static Executor *Bind(nnvm::Symbol symbol,
const Context& default_ctx,
const std::map<std::string, Context>& group2ctx,
const std::vector<NDArray> &in_args,
const std::vector<NDArray> &arg_grad_store,
const std::vector<OpReqType> &grad_req_type,
const std::vector<NDArray> &aux_states,
Executor* shared_exec = NULL);
//Only need some of the necessary arrays, and the other arrays can be infered automatically.
static Executor* SimpleBind(nnvm::Symbol symbol,
const Context& default_ctx,
const std::map<std::string, Context>& group2ctx,
const std::vector<Context>& in_arg_ctxes,
const std::vector<Context>& arg_grad_ctxes,
const std::vector<Context>& aux_state_ctxes,
const std::unordered_map<std::string, TShape>& arg_shape_map,
const std::unordered_map<std::string, int>& arg_dtype_map,
const std::unordered_map<std::string, int>& arg_stype_map,
const std::vector<OpReqType>& grad_req_types,
const std::unordered_set<std::string>& param_names,
std::vector<NDArray>* in_args,
std::vector<NDArray>* arg_grads,
std::vector<NDArray>* aux_states,
std::unordered_map<std::string, NDArray>*
shared_data_arrays = nullptr,
Executor* shared_exec = nullptr);
typedef std::function<void(const char*, void*)> MonitorCallback;//user-defined monitor callback
virtual void SetMonitorCallback(const MonitorCallback& callback) {}//Install a callback to notify the completion of operation.
}; // class executor
sym是nnvm的Symbol类,定义为:
class NNVM_DLL Symbol {
public:
enum ListAttrOption {kRecursive = 0, kShallow = 1};//option passed to ListAttr
enum ListInputOption {kAll = 0, kReadOnlyArgs = 1, kAuxiliaryStates = 2};//option passed to ListInputNames
std::vector<NodeEntry> outputs; //output entries contained in the symbol
Symbol Copy() const;//A deep copy of this symbol
void Print(std::ostream &os) const; // Print the symbol info to output stream.
Symbol operator[] (size_t index) const;//Get the index-th element from the returned tuple.
std::vector<NodePtr> ListInputs(ListInputOption option) const;// List the input variable nodes
std::vector<std::string> ListInputNames(ListInputOption option) //List the input namesconst;
std::vector<std::string> ListOutputNames() const;//List the names of outputs for this symbol.
//Compose the symbol with arguments, this changes the current symbol.
void Compose(const array_view<const Symbol*>& args,
const std::unordered_map<std::string, const Symbol*>& kwargs,
const std::string& name);
//equivalent to Copy then Compose.
Symbol operator () (const array_view<const Symbol*>& args,
const std::unordered_map<std::string, const Symbol*>& kwargs,
const std::string& name) const;
void AddControlDeps(const Symbol& src); //Add control flow dependencies to the operators in symbols.
Symbol GetInternals() const; //A new symbol whose output contains all the outputs of the symbols including input variables and intermediate outputs.
Symbol GetChildren() const; //Get the direct inputs of the head node(s) of this symbol.
void SetAttrs(const std::vector<std::pair<std::string, std::string> >& attrs); //Set additional attributes to current node.
bool GetAttr(const std::string& key, std::string* out) const;//Get attributes from the symbol.
std::unordered_map<std::string, std::string> ListAttrs(ListAttrOption option) const;//Get attribute dictionary from the symbol.
std::vector<std::tuple<std::string, std::string, std::string> > ListAttrsRecursive() const;//Get attribute dictionary from the symbol and all children.
static Symbol CreateFunctor(const Op* op, std::unordered_map<std::string, std::string> attrs); //Create symbolic functor(AtomicSymbol) by given operator and attributes.
static Symbol CreateFunctor(const NodeAttrs& attrs); //Create symbolic functor(AtomicSymbol) by given node attributes.
static Symbol CreateVariable(const std::string& name); //Create symbol node representing variable.
static Symbol CreateGroup(const std::vector<Symbol>& symbols);//Create equivalence of symbol by grouping the symbols together.
};
ctx是base.h中定义的Context结构体:
struct Context
{
enum DeviceType {kCPU = cpu::kDevMask, kGPU = gpu::kDevMask, kCPUPinned = 3, kCPUShared = 5,};//Type of device
DeviceType dev_type;//the device type we run the op on
int32_t dev_id;//device id we are going to run it on
Context() : dev_type(kCPU), dev_id(0) {}//default constructor
//Get corresponding device mask,return cpu::kDevMask or gpu::kDevMask
inline DeviceType dev_mask() const
{
if (dev_type == kCPUPinned || dev_type == kCPUShared)
return kCPU;
return dev_type;
}
//Returns dev_id for kGPU and kCPUPinned, 0 otherwise
inline int real_dev_id() const
{
if (dev_type == kCPUPinned || dev_type == kGPU)
return dev_id;
return 0;
}
//used to enable Context as std::map key
inline bool operator<(const Context &b) const;
// check if current context equals another one
inline bool operator==(const Context &b) const
{
return dev_type == b.dev_type && dev_id == b.dev_id;
}
// check if current context not equals another one
inline bool operator!=(const Context &b) const
{
return !(*this == b);
}
// save the content into binary stream
inline void Save(dmlc::Stream *strm) const
{
strm->Write(&dev_type, sizeof(dev_type));
strm->Write(&dev_id, sizeof(dev_id));
}
// load the content from binary stream
inline bool Load(dmlc::Stream *strm)
{
if (strm->Read(&dev_type, sizeof(dev_type)) != sizeof(dev_type))
return false;
if (strm->Read(&dev_id, sizeof(int32_t)) != sizeof(int32_t))
return false;
return true;
}
static const int32_t kMaxDevType = 6; //the maximal device type
static const int32_t kMaxDevID = 16; //the maximal device index
inline static Context Create(DeviceType dev_type, int32_t dev_id = -1);//Create a new context
inline static Context CPU(int32_t dev_id = 0);//return CPU Context
inline static Context GPU(int32_t dev_id = -1);//Create a GPU,-1 for current GPU.context.
inline static Context CPUPinned(int32_t dev_id = -1);//Create a pinned CPU context
inline static Context CPUShared(int32_t dev_id = 0);//Create a CPU shared memory context.
inline static Context FromString(const std::string& str);//Create a context from string of the format [cpu|gpu|cpu_pinned]
inline static int32_t GetGPUCount();//Get the number of GPUs available
inline static void GetGPUMemoryInformation(int dev, uint64_t *free, uint64_t *total);//get the free and total available memory on a GPU
};
out_shapes是通过vector存储的TShape数据,TShape是nnvm中定义的尺度的类,为了可视化通过vector<uint32_t>的out_shapes_buffer来保存实际尺度数据。TShape类的定义为:
class TShape : public Tuple<dim_t>
{
public:
TShape() = default;//default constructor
inline TShape(uint32_t ndim)
{
this->SetDim(ndim);
std::fill_n(begin(), ndim, 1);
}// constructor to construct a shape with all 1
inline TShape(const Tuple<dim_t>& s) // copy constructor of TShape
{this->assign(s.begin(), s.end());}
inline TShape(std::initializer_list<dim_t> init) //constructor from initializer list
{this->assign(init.begin(), init.end());}
inline TShape(Tuple<dim_t>&& s) //move constructor.
{this->swap(s);}
template<typename RandomAccessIterator>
inline TShape(RandomAccessIterator begin, RandomAccessIterator end) //construct the Tuple from content of iterator
{this->assign(begin, end);}
inline TShape& operator=(const Tuple<dim_t>& src)
{
this->assign(src.begin(), src.end());
return *this;
}//assignment function from tshape
inline TShape& operator=(Tuple<dim_t>&& src)
{
TShape(std::move(src)).swap(*this);
return *this;
}//move assignment function from tshape
inline size_t Size() const
{
dim_t size = 1;
const dim_t* start = begin(), *fin = end();
for (const dim_t* it = start; it != fin; ++it)
{
size *= *it;
}
return size;
}//total number of elements in the shape
inline size_t ProdShape(int dimstart, int dimend) const
{
dim_t num = 1;
const dim_t *d = this->data();
for (int i = dimstart; i < dimend; ++i)
{
num *= d[i];
}
return num;
}//product shape in [dimstart,dimend)
inline const dim_t *data() const //the begin data pointer to content of the tuple
{return begin();}
inline dim_t *data() //the begin data pointer to content of the tuple
{return begin();}
#ifdef MSHADOW_XINLINE
template<int dim>
inline TShape(const mshadow::Shape<dim> &s)
{this->assign(s.shape_, s.shape_ + dim);}
template<int dim>
inline TShape(mshadow::Shape<dim> &&s)
{this->assign(s.shape_, s.shape_ + dim);}
template<int dim>
inline TShape &operator=(const mshadow::Shape<dim> &shape)
{
this->assign(shape.shape_, shape.shape_ + dim);
return *this;
}//assignment from shape
template<int dim>
inline mshadow::Shape<dim> get() const
{
CHECK_EQ(dim, static_cast<int>(ndim()))
<< "dimension do not match target dimension " << dim << " vs " << ndim();
const dim_t *d = this->data();
mshadow::Shape<dim> s;
for (int i = 0; i < dim; ++i) {
s[i] = d[i];
}
return s;
}//get the shape of tensor specifying dim
inline mshadow::Shape<2> FlatTo2D(void) const
{
mshadow::Shape<2> s;
if (ndim() == 0) return mshadow::Shape2(0, 0);
const dim_t *d = this->data();
s.shape_[1] = d[ndim() - 1];
dim_t ymax = 1;
for (size_t i = 1; i < ndim(); ++i) {
ymax *= d[i - 1];
}
s.shape_[0] = ymax;
return s;
}//flatten the higher dimension to second dimension, return a 2D shape
inline mshadow::Shape<3> FlatTo3D(size_t axis_begin, size_t axis_end) const {
CHECK(axis_end >= axis_begin);
mshadow::Shape<3> s;
if (ndim() == 0) return mshadow::Shape3(0, 0, 0);
const dim_t *d = this->data();
s.shape_[0] = 1;
s.shape_[1] = 1;
s.shape_[2] = 1;
for (size_t i = 0; i < axis_begin; ++i) {
s.shape_[0] *= d[i];
}
for (size_t i = axis_begin; i <= axis_end; ++i) {
s.shape_[1] *= d[i];
}
for (size_t i = axis_end + 1; i < ndim(); ++i) {
s.shape_[2] *= d[i];
}
return s;
}//flatten the shape into three parts: [0, axis_begin), [axis_begin, axis_end], (axis_end, ndim)
inline mshadow::Shape<3> FlatTo3D(size_t axis) const //flatten the axis before and after the specified axis, so it becomes 3D tensor
{ return FlatTo3D(axis, axis);}
inline bool operator==(const TShape &s) const
{
if (ndim() != s.ndim()) return false;
return std::equal(begin(), end(), s.begin());
}
inline bool operator!=(const TShape &s) const
{return !(*this == s);}
template<int dim>
inline bool operator==(const mshadow::Shape<dim> &s) const
{
if (ndim_ != dim) return false;
const dim_t *d = dim <= kStackCache ? data_stack_ : data_heap_;
for (size_t i = 0; i < dim; ++i)
{
if (d[i] != s.shape_[i]) return false;
}
return true;
}//whether two shape equals
template<int dim>
inline bool operator!=(const mshadow::Shape<dim> &s) const
{ return !(*this == s);}//whether two shape not equals
#endif
};
out_arrays、arg_arrays、aux_arrays都是vector存储的NDArray数据,NDArray的定义为:
class NDArray {
public:
NDArray() {}//default constructor
//constructs a new dynamic NDArray
NDArray(const TShape &shape, Context ctx, bool delay_alloc = false, int dtype = mshadow::default_type_flag)
: ptr_(std::make_shared<Chunk>(shape, ctx, delay_alloc, dtype)), shape_(shape), dtype_(dtype), storage_type_(kDefaultStorage), entry_({nullptr, 0, 0})
{}
//constructor for NDArray with storage type
NDArray(const NDArrayStorageType stype, const TShape &shape, Context ctx,
bool delay_alloc = true, int dtype = mshadow::default_type_flag,
std::vector<int> aux_types = {}, std::vector<TShape> aux_shapes = {},
TShape storage_shape = TShape(mshadow::Shape1(0)));
//constructing a static NDArray that shares data with TBlob, make sure the memory region is available through out the life of NDArray
NDArray(const TBlob &data, int dev_id)
: ptr_(std::make_shared<Chunk>(data, dev_id)), shape_(data.shape_),
dtype_(data.type_flag_), storage_type_(kDefaultStorage),
entry_({nullptr, 0, 0})
{}
//constructing a static NDArray that shares data with TBlob which is with deleter
NDArray(const TBlob &data, int dev_id, const std::function<void()>& deleter)
: ptr_(new Chunk(data, dev_id),
[deleter](Chunk *p)
{
deleter(); // call custom deleter
delete p; // delete Chunk object
}),
shape_(data.shape_),
dtype_(data.type_flag_), storage_type_(kDefaultStorage),
entry_({nullptr, 0, 0})
{}
//create ndarray from shared memory
NDArray(int shared_pid, int shared_id, const TShape& shape, int dtype)
: ptr_(std::make_shared<Chunk>(shared_pid, shared_id, shape, dtype)), shape_(shape),
dtype_(dtype), storage_type_(kDefaultStorage), entry_({nullptr, 0, 0})
{}
//constructing a static NDArray of non-default storage that shares data with TBlob
NDArray(const NDArrayStorageType stype, const TShape &shape,
const TBlob &data, const std::vector<TBlob> &aux_data, int dev_id)
: ptr_(std::make_shared<Chunk>(stype, data, aux_data, dev_id)), shape_(shape),
dtype_(data.type_flag_), storage_type_(stype), entry_({nullptr, 0, 0})
{}
//This indicates whether an array created by reshape or slice
inline bool IsView() const
{
// View only works on the default storage
if (storage_type() != kDefaultStorage)
return false;
// If the array reuses memory, its shape may be different from the storage
// shape. However, we shouldn't consider it as a view.
if (reuse_)
return false;
return byte_offset_ > 0 || shape() != ptr_->storage_shape;
}
// Check whether the two arrays are the same array */
inline bool IsSame(const NDArray& other) const
{
return ptr_ == other.ptr_ &&
shape_ == other.shape_ &&
byte_offset_ == other.byte_offset_ &&
dtype_ == other.dtype_;
}
inline const TShape& shape() const {return shape_;}//return the shape of current NDArray.
//the shape of underlying chunk which stores the NDArray data/value
inline const TShape &storage_shape() const
{
CHECK(ptr_ != nullptr);
CHECK_NE(storage_type(), kDefaultStorage)
<< "storage_shape() is not intended for kDefaultStorage.";
return ptr_->storage_shape;
}
inline const TShape& aux_shape(size_t index) const {CHECK_NE(storage_type(), kDefaultStorage)<< "...";return ptr_->aux_shapes[index];}//get the shape of aux_data(index)
const std::vector<TShape>& aux_shapes() const {CHECK_NE(storage_type(), kDefaultStorage)<< "...";return ptr_->aux_shapes;}//return the shapes of all aux data
const std::vector<int>& aux_types() const {CHECK_NE(storage_type(), kDefaultStorage)<< "...";return ptr_->aux_types;}//returns the dtypes of all aux data
inline void set_aux_shape(size_t index, const TShape& shape) const {CHECK_NE(storage_type(), kDefaultStorage)<< "...";ptr_->set_aux_shape(index, shape);}// reset the exact size when the shape is known
//return the data TBlob
inline const TBlob& data() const
{
if (storage_type() == kDefaultStorage) CheckAndAlloc();
SetTBlob();
return tblob_;
}
NDArray grad() const;// return the gradient ndarray.
// return the aux TBlob
inline TBlob aux_data(size_t i) const
{
auto stype = storage_type();
TBlob res;
auto shape = aux_shape(i);
auto type = aux_type(i);
MSHADOW_TYPE_SWITCH(type, DType,
{
auto dptr = static_cast<DType*>(ptr_->aux_handles[i].dptr);
CHECK(stype == kRowSparseStorage || stype == kCSRStorage)
<< "Unexpected storage type: " << stype;
res = TBlob(dptr, shape, ptr_->aux_handles[i].ctx.dev_mask(), type);
});
return res;
}
inline Context ctx() const{CHECK(!is_none());return ptr_->shandle.ctx;}// return the context of NDArray
inline int dtype() const {return dtype_;}// return the data type of NDArray
inline int aux_type(size_t i) const {CHECK(!is_none());return ptr_->aux_types[i];}
inline NDArrayStorageType storage_type() const {return storage_type_;}
inline bool is_none() const {return ptr_.get() == nullptr;}//return whether this ndarray is not initialized
bool fresh_out_grad() const; //return updated grad state in entry_
void set_fresh_out_grad(bool state) const; //return updated grad state in entry_
// a sparse ndarray's aux_data and storage are initialized
inline bool storage_initialized() const
{
if (is_none()) return false;
auto stype = storage_type();
CHECK_NE(stype, kDefaultStorage) << "...";
if (stype == kRowSparseStorage)
{
CHECK_EQ(aux_shape(rowsparse::kIdx)[0], storage_shape()[0])
<< "inconsistent storage shape " << storage_shape()
<< " vs. aux shape " << aux_shape(rowsparse::kIdx);
return aux_shape(rowsparse::kIdx).Size() != 0;
}
else if (stype == kCSRStorage)
{
CHECK_EQ(aux_shape(csr::kIdx)[0], storage_shape()[0])
<< "inconsistent storage shape " << storage_shape()
<< " vs. aux shape " << aux_shape(csr::kIdx);
return aux_shape(csr::kIdx).Size() != 0;
}
else {LOG(FATAL) << "Unknown storage type";}
return true;
}
// get storage handle
inline Storage::Handle storage_handle() const
{
CHECK(!is_none());
CHECK_EQ(storage_type(), kDefaultStorage);
CheckAndAlloc();
return ptr_->shandle;
}
// write finished, performed read
inline void WaitToRead() const
{
if (is_none()) return;
Engine::Get()->WaitForVar(ptr_->var);
}
//read/write operations finished, performed write
inline void WaitToWrite() const
{
if (is_none()) return;
Engine::Get()->PushAsync([](RunContext, Engine::CallbackOnComplete on_complete)
{on_complete();}, Context{}, {}, {ptr_->var});
Engine::Get()->WaitForVar(ptr_->var);
}
inline Engine::VarHandle var() const {return ptr_->var;}//return the associated variable of the ndarray.
inline size_t byte_offset() const {return byte_offset_;}//return byte offset in chunk of the ndarray
inline size_t version() const {return var()->version();}//return var version of the NDArray
void Save(dmlc::Stream *strm) const;//save the content into binary stream
bool LegacyLoad(dmlc::Stream *strm, const uint32_t magic);//load ndarrays before supporting sparse ndarrays
bool Load(dmlc::Stream *strm);//load the content from binary stream
NDArray &operator=(real_t scalar);//set all the elements in ndarray to be scalar
NDArray &operator+=(const NDArray &src);//elementwise add to current space this mutate the current NDArray
NDArray &operator+=(const real_t &src);//elementwise add to current space
NDArray &operator-=(const NDArray &src);//elementwise subtract from current ndarray
NDArray &operator-=(const real_t &src);//elementwise subtract from current ndarray
NDArray &operator*=(const NDArray &src);//elementwise multiplication to current ndarray
NDArray &operator*=(const real_t &src);//elementwise multiplication to current ndarray
NDArray &operator/=(const NDArray &src);//elementwise division from current ndarray
NDArray &operator/=(const real_t &src);//elementwise division from current ndarray
NDArray Copy(Context ctx) const;// return a new copy this NDArray
void SyncCopyFromCPU(const void *data, size_t size) const;//Do a synchronize copy from a continugous CPU memory region. will call WaitToWrite
void SyncCopyFromNDArray(const NDArray &src, int i = -1, int j = -1);//Copy from src.data()/aux_data(i) to this->data()/aux_data(j)
void SyncCopyToCPU(void *data, size_t size) const;//Do a synchronize copy to a continugous CPU memory region. will call WaitToRead
void SyncCheckFormat(const bool full_check) const;//check whether the NDArray format is valid
NDArray Slice(index_t begin, index_t end) const;//Slice a NDArray
NDArray SliceWithRecord(index_t begin, index_t end);//Slice a NDArray. Supports recording with autograd
NDArray At(index_t idx) const;//Index a NDArray
NDArray AtWithRecord(index_t idx);
NDArray aux_ndarray(size_t i) const;//Generate a deep copy of aux_data(i)
NDArray data_ndarray() const;//Generate a deep copy of data()
//Create a NDArray that shares memory with current one, The new array must have smaller memory size
inline NDArray AsArray(const TShape &shape, int dtype) const
{
CHECK_EQ(storage_type(), kDefaultStorage) << "...";
CHECK_GE(ptr_->shandle.size, shape.Size() * mshadow::mshadow_sizeof(dtype)) << "...";
// We can't reuse memory in a view.
CHECK(!IsView());
NDArray ret = *this;
ret.shape_ = shape;
ret.dtype_ = dtype;
ret.reuse_ = true;
return ret;
}
DLManagedTensor* ToDLPack() const; //Create a reference view of NDArray
static NDArray FromDLPack(const DLManagedTensor* tensor); //create a NDArray using the memory allocated by an external deep learning framework that is DLPack compatible.
//Update ndarray chunk storage handles using existing ndarray storage handles
inline void SparseUpdateChunk(const NDArray &arr) const
{
CHECK(shape_ == arr.shape_) << "ndarray shape is different from the target";
CHECK(dtype_ == arr.dtype_) << "ndarray dtype is different from the target";
auto stype = arr.storage_type();
CHECK(stype == kCSRStorage || stype == kRowSparseStorage) << "...";
// swap shandles between src and dst
Storage::Handle shandle_dst = arr.ptr_->shandle;
arr.ptr_->shandle = ptr_->shandle;
ptr_->shandle = shandle_dst;
ptr_->storage_shape = arr.ptr_->storage_shape;
ptr_->storage_type = arr.ptr_->storage_type;
ptr_->ctx = arr.ptr_->ctx;
// swap aux_handles between src and dst
size_t aux_idx = 0;
CHECK(ptr_->aux_handles.size() == arr.ptr_->aux_handles.size()) << "...";
for (auto &aux_handle : arr.ptr_->aux_handles)
{
Storage::Handle aux_dst = ptr_->aux_handles[aux_idx];
ptr_->aux_handles[aux_idx] = aux_handle;
aux_handle = aux_dst;
aux_idx++;
}
ptr_->aux_types = arr.ptr_->aux_types;
ptr_->aux_shapes = arr.ptr_->aux_shapes;
}
NDArray Reshape(const TShape &shape) const;//Get an reshaped NDArray
NDArray ReshapeWithRecord(const TShape &shape);//Get an reshaped NDArray. Supports autograd recording
//Return a copy of this NDArray without autograd history
NDArray Detach() const
{
NDArray ret(*this);
ret.entry_ = nnvm::NodeEntry{nullptr, 0, 0};
return ret;
}
nnvm::Symbol get_autograd_symbol() const;
//Allocate the space if it is delayed allocated.
inline void CheckAndAlloc() const
{
CHECK_EQ(storage_type(), kDefaultStorage);
ptr_->CheckAndAlloc();
}
//Allocate the space if the allocation has been delayed or the requested size is bigger than the available one.
void ReshapeAndAlloc(const TShape& shape)
{
CHECK_EQ(storage_type(), kDefaultStorage);
CHECK(!is_none());
shape_ = shape;
ptr_->CheckAndAlloc(shape.Size() * mshadow::mshadow_sizeof(dtype_));
}
//Alloc memory for non-default storage
inline void CheckAndAlloc(const std::vector<TShape> &aux_shapes) const
{
CHECK_NE(storage_type(), kDefaultStorage) << "...";
ptr_->CheckAndAlloc(shape_, aux_shapes, dtype_);
}
inline void CheckAndAllocData(const TShape &storage_shape) const
{
CHECK_NE(storage_type(), kDefaultStorage) << "...";
ptr_->CheckAndAllocData(storage_shape, dtype_);
}
inline void CheckAndAllocAuxData(size_t i, const TShape &aux_shape) const
{
CHECK_NE(storage_type(), kDefaultStorage) << "...";
ptr_->CheckAndAllocAuxData(i, aux_shape);
}
#if MXNET_USE_MKLDNN == 1
explicit NDArray(const mkldnn::memory *mkldnn_mem, bool static_data = true);//Create NDArray from mkldnn memory.
bool IsMKLDNNData() const {return ptr_->IsMKLDNN();}//Test if the data is stored in one of special MKLDNN format
bool IsDefaultData() const {return ptr_->IsDefault();}//Test if the data is stored in one of default MXNet formats.
/* there is a shared pointer that hold the memory either in NDArray or in MKLDNN stream.
As long as we call these functions inside an operator, the return memory is always valid. */
const mkldnn::memory *GetMKLDNNData() const;//returns mkldnn::memory with the default primitive_desc.
const mkldnn::memory *GetMKLDNNData(const mkldnn::memory::primitive_desc &desc) const;//returns mkldnn::memory with the given primitive_desc
const mkldnn::memory *GetMKLDNNDataReorder(const mkldnn::memory::primitive_desc &desc) const;//returns mkldnn::memory with the given primitive_desc have the same physical layout as the given primitive_desc.
void CopyFrom(const mkldnn::memory &mem);//copies data from mkldnn memory.
mkldnn::memory *CreateMKLDNNData(const mkldnn::memory::primitive_desc &desc);//allocates memory for array and creates mkldnn memory with the specified format.
//changes the layout of this NDArray, but it happens after all accesses to the array are complete.
void Reorder2DefaultAsync();
void MKLDNNDataReorderAsync(const mkldnn::memory::primitive_desc &desc);
NDArray Reorder2Default() const;//creates a new NDArray with the reordered data.
void InvalidateMKLDNNData();
NDArray MKLDNNDataReshape(const TShape &shape) const;//reshape an array, only valid inside the current invocation of this operator.
void UpdateMKLDNNMemDesc();//Fix mkldnn memory descriptor mismatch from NDArray.
#endif
//Save list of ndarray into the Stream.x
static void Save(dmlc::Stream* fo,
const std::vector<NDArray>& data,
const std::vector<std::string>& names);
//Load list of ndarray into from the stream.
static void Load(dmlc::Stream* fi,
std::vector<NDArray>* data,
std::vector<std::string>* keys);
private:
friend class Imperative;
struct Chunk //the real data chunk that backs NDArray
{
Storage::Handle shandle;//storage handle from storage engine, for non-default storage, store the actual values in the NDArray
std::vector<Storage::Handle> aux_handles;//store the aux data(such as indices) if it's needed by non-default storage.
#if MXNET_USE_MKLDNN == 1
std::shared_ptr<MKLDNNMemory> mkl_mem_;//is created when data is stored in MKLDNN format.
#endif
Engine::VarHandle var;//variable from engine
bool static_data;//construct from static data, true means the data do not come from Storage, and do not need to be freed
bool delay_alloc;//whether data allocation is delayed
NDArrayStorageType storage_type = kDefaultStorage;// the type of the storage.
std::vector<int> aux_types;//type of aux
Context ctx;// context of data
TShape storage_shape;// The shape of the chunk data.
std::vector<TShape> aux_shapes;// The shape of aux data.
Chunk() : static_data(true), delay_alloc(false) {}//default cosntructor
//construct a new chunk
Chunk(TShape shape, Context ctx_, bool delay_alloc_, int dtype)
: static_data(false), delay_alloc(true), ctx(ctx_)
{
auto size = shape.Size();
storage_shape = shape;
var = Engine::Get()->NewVariable();
shandle.size = size * mshadow::mshadow_sizeof(dtype);
shandle.ctx = ctx_;
if (!delay_alloc_) this->CheckAndAlloc();
}
Chunk(const TBlob &data, int dev_id)
: static_data(true), delay_alloc(false)
{
CHECK(storage_type == kDefaultStorage);
var = Engine::Get()->NewVariable();
if (data.dev_mask() == cpu::kDevMask) {ctx = Context::CPU();}
else {CHECK_EQ(data.dev_mask(), gpu::kDevMask);ctx = Context::GPU(dev_id);}
// init shandle
shandle.ctx = ctx;
shandle.dptr = data.dptr_;
shandle.size = data.shape_.Size() * mshadow::mshadow_sizeof(data.type_flag_);
storage_shape = data.shape_;
}
Chunk(int shared_pid, int shared_id, const TShape& shape, int dtype)
: static_data(false), delay_alloc(false)
{
var = Engine::Get()->NewVariable();
ctx = Context::CPUShared(0);
shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype);
shandle.ctx = ctx;
shandle.shared_pid = shared_pid;
shandle.shared_id = shared_id;
Storage::Get()->Alloc(&shandle);
storage_shape = shape;
}
// Constructor for a non-default storage chunk
Chunk(NDArrayStorageType storage_type_, const TShape &storage_shape_, Context ctx_, bool delay_alloc_, int dtype, const std::vector<int> &aux_types_, const std::vector<TShape> &aux_shapes_)
: static_data(false), delay_alloc(delay_alloc_), storage_type(storage_type_), aux_types(aux_types_), ctx(ctx_), storage_shape(storage_shape_), aux_shapes(aux_shapes_)
{
shandle.ctx = ctx;
var = Engine::Get()->NewVariable();
// aux_handles always reflect the correct number of aux data
for (size_t i = 0; i < aux_shapes.size(); i++)
{
CheckAndAllocAuxData(i, aux_shapes[i]);
// this line is needed in case when aux_shapes[i].Size() = 0
// aux_handles[i] will not be updated and take only default value.
aux_handles[i].ctx = ctx;
}
if (!delay_alloc) {CheckAndAllocData(storage_shape, dtype);}
}
Chunk(const NDArrayStorageType storage_type_, const TBlob &data, const std::vector<TBlob> &aux_data, int dev_id)
: static_data(true), delay_alloc(false), storage_type(storage_type_)
{
using namespace mshadow;
CHECK_NE(storage_type, kDefaultStorage);
// init var
var = Engine::Get()->NewVariable();
// init ctx
if (data.dev_mask() == cpu::kDevMask) {ctx = Context::CPU();}
else {CHECK_EQ(data.dev_mask(), gpu::kDevMask);ctx = Context::GPU(dev_id);}
// init shandle
shandle.ctx = ctx;
shandle.dptr = data.dptr_;
shandle.size = data.shape_.Size() * mshadow_sizeof(data.type_flag_);
storage_shape = data.shape_;
// init aux handles
for (const auto &aux : aux_data)
{
Storage::Handle aux_handle;
aux_handle.ctx = ctx;
aux_handle.dptr = aux.dptr_;
aux_handle.size = aux.shape_.Size() * mshadow_sizeof(aux.type_flag_);
aux_handles.push_back(aux_handle);
aux_types.emplace_back(aux.type_flag_);
aux_shapes.emplace_back(aux.shape_);
}
}
//set the shape for ith aux data, and update storage shape if necessary
inline void set_aux_shape(const size_t i, const TShape& shape)
{
aux_shapes[i] = shape;
if (storage_shape.ndim() > 0)
{
if (storage_type == kRowSparseStorage && i == rowsparse::kIdx) {storage_shape[0] = shape[0];}
else if (storage_type == kCSRStorage && i == csr::kIdx) {storage_shape[0] = shape[0];}
}
}
// check if delay alloc is on, do alloc if not yet done
inline void CheckAndAlloc(void)
{
if (delay_alloc)
{
shandle = Storage::Get()->Alloc(shandle.size, shandle.ctx);
#if MXNET_USE_MKLDNN == 1
mkl_mem_ = nullptr;
#endif
delay_alloc = false;
}
}
// Check and alloc memory for a dense ndarray, size is the number of bytes
void CheckAndAlloc(uint64_t dbytes)
{
CHECK_EQ(kDefaultStorage, storage_type) << "...";
dbytes = std::max(dbytes, static_cast<uint64_t>(shandle.size));
if (delay_alloc)
{
shandle = Storage::Get()->Alloc(dbytes, shandle.ctx);
#if MXNET_USE_MKLDNN == 1
mkl_mem_ = nullptr;
#endif
delay_alloc = false;
}
else if (shandle.size < dbytes)
{
// free storage if necessary and alloc again
if (shandle.size > 0) Storage::Get()->Free(shandle);
// init storage
shandle = Storage::Get()->Alloc(dbytes, shandle.ctx);
#if MXNET_USE_MKLDNN == 1
mkl_mem_ = nullptr;
#endif
}
}
inline void CheckAndAlloc(const TShape &shape, const std::vector<TShape> &aux_shapes, int dtype)
{
// calculate size, perform allocation
if (kRowSparseStorage == storage_type) {
// For row sparse, aux_shape indicates the number of rows to allocate
auto aux_shape = aux_shapes[rowsparse::kIdx];
CheckAndAllocAuxData(rowsparse::kIdx, aux_shape);
TShape storage_shape(shape);
storage_shape[0] = aux_shape[0];
CheckAndAllocData(storage_shape, dtype);
} else if (kCSRStorage == storage_type) {
CheckAndAllocAuxData(csr::kIndPtr, aux_shapes[csr::kIndPtr]);
CheckAndAllocAuxData(csr::kIdx, aux_shapes[csr::kIdx]);
CheckAndAllocData(aux_shapes[csr::kIdx], dtype);
} else {
LOG(FATAL) << "Storage type " << storage_type << " not implemented for CheckAndAlloc";
}
}
// create storage handle for data based on shape and dtype, assuming ctx is set storage shape is also updated if data is already allocated, try reuse the storage. Otherwise, free the current one and allocate new storage
void CheckAndAllocData(const TShape &shape, int dtype);
#if MXNET_USE_MKLDNN == 1
// Have MKL memory reference to the data in the default storage
// or create memory for MKLDNN.
void SetMKLMem(const TShape &shape, int dtype);
// If the data is stored in MKLDNN layout, we reorder data in mkl_mem_ and
// save the result in shandle.
void Reorder2Default();
// Reroder data to a specified layout.
void MKLDNNDataReorder(const mkldnn::memory::primitive_desc &desc);
bool IsMKLDNN() const;
bool IsDefault() const;
#endif
// create storage handle for aux data based on shape this function assumes ctx, aux shapes and aux types are set aux shape is also updated if aux data is already allocated, try reuse the storage. Otherwise, free the current one and allocate new storage
inline void CheckAndAllocAuxData(size_t i, const TShape &shape)
{
CHECK_EQ(shape.ndim(), 1) << "...";
CHECK_NE(storage_type, kUndefinedStorage) << "...";
CHECK_NE(storage_type, kDefaultStorage) << "...";
if (aux_handles.size() <= i) {aux_handles.resize(i + 1);}
size_t aux_bytes = shape.Size() * mshadow::mshadow_sizeof(aux_types[i]);
if (aux_handles[i].size < aux_bytes)
{
// free storage if necessary and alloc again
if (aux_handles[i].size > 0) Storage::Get()->Free(aux_handles[i]);
// init aux storage
aux_handles[i] = Storage::Get()->Alloc(aux_bytes, ctx);
}
set_aux_shape(i, shape);// init shape
}
~Chunk();//destructor
}; // struct Chunk
void SetTBlob() const;
std::shared_ptr<Chunk> ptr_{nullptr};//internal data of NDArray
TShape shape_;//shape of current NDArray
size_t byte_offset_ = 0;//byte offset in chunk
int dtype_ = -1;//type of data
bool reuse_ = false;//whether the NDArray uses memory of another NDArray.
NDArrayStorageType storage_type_ = kUndefinedStorage;//storage type of data
nnvm::NodeEntry entry_;//node entry for autograd
mutable TBlob tblob_;//internal TBlob
}; // class NDArray