Caffe2 - C++ API
A deep learning, cross platform ML framework
mkl_memory.h
1 
17 #ifndef CAFFE2_UTILS_MKL_MKL_MEMORY_H_
18 #define CAFFE2_UTILS_MKL_MKL_MEMORY_H_
19 
20 #include <string>
21 #include <vector>
22 #include <mutex>
23 
24 #include "caffe2/core/flags.h" // for TIndex
25 #include "caffe2/core/tensor.h" // for TIndex
26 #include "caffe2/mkl/utils/mkl_dnn_cppwrapper.h"
27 
28 // A global boolean variable that controls the behavior when we call View() on
29 // an MKLMemory: if it is set true, then the View() function will actually
30 // change the underlying storage. If it is set false, an implicit copy is
31 // triggered but the original storage is not affected.
32 CAFFE2_DECLARE_bool(caffe2_mkl_implicit_layout_change);
33 
34 namespace caffe2 {
35 namespace mkl {
36 
37 template <typename T>
39  public:
40  PrimitiveWrapper() {}
41  // Creates a primitive wrapper from an existing primitive. The wrapper
42  // takes over ownership.
43  explicit PrimitiveWrapper(dnnPrimitive_t primitive) : primitive_(primitive) {}
44 
45  template <typename Creator, typename FirstArg, typename... Args>
46  PrimitiveWrapper(Creator creator, FirstArg&& arg, Args&&... args) {
47  creator(&primitive_, arg, args...);
48  }
49 
50  ~PrimitiveWrapper() {
51  if (primitive_) {
52  MKLDNN_CHECK(dnnDelete<T>(primitive_));
53  }
54  }
55 
56  template <typename Creator, typename... Args>
57  void Reset(Creator creator, Args&&... args) {
58  if (primitive_) {
59  MKLDNN_SAFE_CALL(dnnDelete<T>(primitive_));
60  }
61  creator(&primitive_, args...);
62  }
63 
64  operator dnnPrimitive_t() const {
65  return primitive_;
66  }
67 
68  private:
69  dnnPrimitive_t primitive_ = 0;
70  DISABLE_COPY_AND_ASSIGN(PrimitiveWrapper);
71 };
72 
73 template <typename T>
75  public:
76  LayoutWrapper() {}
77  // Create a user layout from a TensorCPU with the given shapes.
78  explicit LayoutWrapper(const TensorCPU& tensor) {
79  Reset(tensor);
80  }
81 
82  // Create an internal layout from the primitive and type.
83  LayoutWrapper(const dnnPrimitive_t primitive, const dnnResourceType_t type) {
84  Reset(primitive, type);
85  }
86 
87  // Create a user layout from the given dimension, size and strides.
89  const size_t dimension,
90  const size_t size[],
91  const size_t strides[]) {
92  Reset(dimension, size, strides);
93  }
94 
95  // Destructs the layout wrapper.
96  ~LayoutWrapper() {
97  if (layout_)
98  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
99  }
100 
101  // Create a user layout from a TensorCPU with the given shapes.
102  void Reset(const TensorCPU& tensor) {
103  if (layout_)
104  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
105  CAFFE_ENFORCE(tensor.size(), "Cannot reset with an empty tensor.");
106  size_t dimension = tensor.ndim();
107  size_t size[dimension];
108  size_t strides[dimension];
109  for (int i = 0; i < dimension; ++i) {
110  size[i] = tensor.dim(dimension - i - 1);
111  strides[i] = (i == 0) ? 1 : strides[i - 1] * size[i - 1];
112  }
113  MKLDNN_SAFE_CALL(dnnLayoutCreate<T>(&layout_, dimension, size, strides));
114  }
115 
116  // Create an internal layout from the primitive and type.
117  void Reset(const dnnPrimitive_t primitive, const dnnResourceType_t type) {
118  CAFFE_ENFORCE(primitive, "Cannot reset with an unknwon primitive.");
119  CAFFE_ENFORCE(
120  type != dnnResourceNumber,
121  "Cannot reset with an unknown resource number.");
122  if (layout_) {
123  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
124  }
125  MKLDNN_SAFE_CALL(
126  dnnLayoutCreateFromPrimitive<T>(&layout_, primitive, type));
127  }
128 
129  // Create a user layout from the given dimension, size and strides.
130  void
131  Reset(const size_t dimension, const size_t size[], const size_t strides[]) {
132  if (layout_)
133  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
134  MKLDNN_SAFE_CALL(dnnLayoutCreate<T>(&layout_, dimension, size, strides));
135  }
136 
137  operator dnnLayout_t() const {
138  return layout_;
139  }
140 
141  private:
142  dnnLayout_t layout_ = 0;
143  DISABLE_COPY_AND_ASSIGN(LayoutWrapper);
144 };
145 
152 template <typename T>
153 class MKLMemory {
154  public:
155  // Initializes an empty MKLMemory.
156  MKLMemory() {}
157  // Initialize an MKLMemory with the given size, strides, dnn
158  // primitive and type.
159  MKLMemory(
160  const size_t dimension,
161  const size_t size[],
162  const size_t strides[],
163  const dnnPrimitive_t primitive = nullptr,
164  const dnnResourceType_t type = dnnResourceNumber,
165  bool share_mem_if_possible = false) {
166  Reset(dimension, size, strides, primitive, type, share_mem_if_possible);
167  }
168 
169  // Initialize an MKLMemory, with the given dimension assuming a C-contiguous
170  // storage.
171  template <typename IndexType>
172  explicit MKLMemory(
173  const vector<IndexType>& dims,
174  const dnnPrimitive_t primitive = nullptr,
175  const dnnResourceType_t type = dnnResourceNumber,
176  bool share_mem_if_possible = false) {
177  Reset(dims, primitive, type, share_mem_if_possible);
178  }
179 
180  // Initialize an MKLMemory with the given size, strides, dnn
181  // primitive and type.
182  void Reset(
183  const size_t dimension,
184  const size_t size[],
185  const size_t strides[],
186  const dnnPrimitive_t primitive = nullptr,
187  const dnnResourceType_t type = dnnResourceNumber,
188  bool share_mem_if_possible = false) {
189  buffer_.reset();
190  dims_.resize(dimension);
191  size_ = 1;
192  for (int i = 0; i < dimension; ++i) {
193  dims_[i] = size[dimension - 1 - i];
194  size_ *= dims_[i];
195  }
196  user_layout_.Reset(dimension, size, strides);
197  if (primitive) {
198  layout_.Reset(primitive, type);
199  } else {
200  layout_.Reset(dimension, size, strides);
201  }
202  convert_in_.Reset(dnnConversionCreate<T>, user_layout_, layout_);
203  convert_out_.Reset(dnnConversionCreate<T>, layout_, user_layout_);
204  share_mem_if_possible_ = share_mem_if_possible;
205  layout_is_user_layout_ = dnnLayoutCompare<T>(layout_, user_layout_);
206  VLOG(2) << "layout is user layout? " << layout_is_user_layout_;
207  if (!share_mem_if_possible_) {
208  // If we are not going to share memory, we will simply allocate
209  // memory upfront.
210  buffer();
211  }
212  }
213 
214  // Initialize an MKLMemory, with the given dimension assuming a C-contiguous
215  // storage.
216  template <typename IndexType>
217  void Reset(
218  const vector<IndexType>& dims,
219  const dnnPrimitive_t primitive = nullptr,
220  const dnnResourceType_t type = dnnResourceNumber,
221  bool share_mem_if_possible = false) {
222  buffer_.reset();
223  dims_.resize(dims.size());
224  size_ = 1;
225  for (int i = 0; i < dims.size(); ++i) {
226  dims_[i] = dims[i];
227  size_ *= dims_[i];
228  }
229  size_t dimension = dims.size();
230  vector<size_t> size(dimension);
231  vector<size_t> strides(dimension);
232  for (int i = 0; i < dimension; ++i) {
233  size[i] = dims[dimension - i - 1];
234  strides[i] = (i == 0) ? 1 : strides[i - 1] * size[i - 1];
235  }
236  user_layout_.Reset(dims.size(), size.data(), strides.data());
237  if (primitive) {
238  layout_.Reset(primitive, type);
239  } else {
240  layout_.Reset(dimension, size.data(), strides.data());
241  }
242  convert_in_.Reset(dnnConversionCreate<T>, user_layout_, layout_);
243  convert_out_.Reset(dnnConversionCreate<T>, layout_, user_layout_);
244  share_mem_if_possible_ = share_mem_if_possible;
245  layout_is_user_layout_ = dnnLayoutCompare<T>(layout_, user_layout_);
246  VLOG(2) << "layout is user layout? " << layout_is_user_layout_;
247  if (!share_mem_if_possible_) {
248  // If we are not going to share memory, we will simply allocate
249  // memory upfront.
250  buffer();
251  }
252  }
253 
258  template <typename IndexType>
259  void Reshape(const vector<IndexType>& dims) {
260  CAFFE_ENFORCE(
261  layout_is_user_layout_,
262  "Reshape is not allowed for custom layouts. "
263  "Convert to plain layout before invoking Reshape().");
264 
265  TIndex new_size = 1;
266  for (auto i = 0; i < dims.size(); ++i) {
267  CAFFE_ENFORCE_GE_WITH_CALLER(dims[i], 0);
268  new_size *= dims[i];
269  }
270  CAFFE_ENFORCE_WITH_CALLER(
271  new_size == size_,
272  "New size and old size are not equal. Reshape is not possible.");
273 
274  vector<TIndex> new_dims(dims.size());
275  vector<size_t> size(dims.size());
276  vector<size_t> strides(dims.size());
277  for (int i = 0; i < dims.size(); ++i) {
278  new_dims[i] = dims[i];
279  size[i] = dims[dims.size() - i - 1];
280  strides[i] = (i == 0) ? 1 : strides[i - 1] * size[i - 1];
281  }
282  dims_ = new_dims;
283  user_layout_.Reset(dims.size(), size.data(), strides.data());
284  layout_.Reset(dims.size(), size.data(), strides.data());
285  convert_in_.Reset(dnnConversionCreate<T>, user_layout_, layout_);
286  convert_out_.Reset(dnnConversionCreate<T>, layout_, user_layout_);
287  }
288 
289  // Destructs the MKLMemory.
290  ~MKLMemory() {}
291 
292  void CopyFrom(const void* ptr) {
293  if (share_mem_if_possible_ && layout_is_user_layout_) {
294  VLOG(2) << "Sharing underlying memory and skip copy.";
295  buffer_.reset(const_cast<void*>(ptr), [](void*) -> void {});
296  } else if (size_ == 0) {
297  VLOG(2) << "Cannot copy into empty MKL buffer.";
298  } else {
299  VLOG(2) << "Copying external content.";
300  MKLDNN_SAFE_CALL(dnnConversionExecute<T>(
301  convert_in_, const_cast<void*>(ptr), buffer()));
302  }
303  }
304 
305  void CopyFrom(const TensorCPU& tensor) {
306  CAFFE_ENFORCE_EQ(
307  tensor.dims(),
308  dims_,
309  "Dims does not match the expected dims of the resource.");
310  CopyFrom(tensor.template data<T>());
311  }
312 
313  void CopyFrom(const MKLMemory<T>& other) {
314  CAFFE_ENFORCE_EQ(
315  other.dims(),
316  dims_,
317  "Dims does not match the expected dims of the resource.");
318 
319  if (share_mem_if_possible_ && dnnLayoutCompare<T>(other.layout_, layout_)) {
320  buffer_ = other.buffer_;
321  } else if (size_ == 0) {
322  VLOG(2) << "Cannot copy between empty MKL buffers";
323  } else {
324  PrimitiveWrapper<T> convert(
325  dnnConversionCreate<T>, other.layout_, layout_);
326  MKLDNN_SAFE_CALL(
327  dnnConversionExecute<T>(convert, other.buffer(), buffer()));
328  }
329  }
330 
331  bool ShareFromRaw(const void* ptr) {
332  if (share_mem_if_possible_ && layout_is_user_layout_) {
333  buffer_.reset(const_cast<void*>(ptr), [](void*) -> void {});
334  return true;
335  } else {
336  return false;
337  }
338  }
339 
340  bool ShareFromTensor(const TensorCPU& tensor) {
341  CAFFE_ENFORCE_EQ(
342  tensor.dims(),
343  dims_,
344  "Dims does not match the expected dims of the resource.");
345  return ShareFromRaw(tensor.template data<T>());
346  }
347 
348  bool ShareFrom(const MKLMemory<T>& other) {
349  if (share_mem_if_possible_ && dnnLayoutCompare<T>(other.layout_, layout_)) {
350  VLOG(2) << "Sharing underlying memory.";
351  buffer_ = other.buffer_;
352  if (!buffer_.get()) {
353  VLOG(2) << "Warning: the source MKLMemory has no content yet, so the "
354  "sharing actually has no effect.";
355  }
356  return true;
357  } else {
358  VLOG(2) << "Not sharing underlying memory.";
359  return false;
360  }
361  }
362 
363  void CopyTo(void* ptr) const {
364  if (buffer_.get() == ptr) {
365  // This is already mapping to the same memory region. Skip copy.
366  VLOG(2) << "CopyTo does not need actual copying, as we are sharing "
367  "memory with the output.";
368  return;
369  }
370  CAFFE_ENFORCE(
371  buffer_.get(), "Canot copy out from an uninitialized MKLMemory.");
372  VLOG(2) << "Copy to external memory.";
373  MKLDNN_SAFE_CALL(dnnConversionExecute<T>(convert_out_, buffer_.get(), ptr));
374  }
375 
376  void CopyTo(TensorCPU* tensor) const {
377  if (tensor->size() > 0 && buffer_.get() == tensor->mutable_data<T>()) {
378  // This is already mapping to the same memory region. Skip copy.
379  VLOG(2) << "CopyTo does not need actual copying, as we are sharing "
380  "memory with the output.";
381  return;
382  }
383  tensor->Resize(dims_);
384  CopyTo(tensor->mutable_data<T>());
385  }
386 
387  // Copies to another MKL memory.
388  //
389  // This function
390  void CopyTo(
391  MKLMemory<T>* other,
392  const dnnPrimitive_t primitive = nullptr,
393  const dnnResourceType_t type = dnnResourceNumber) {
394  if (buffer_ && buffer_.get() == other->buffer_.get()) {
395  CAFFE_ENFORCE(
396  dnnLayoutCompare<T>(other->layout_, layout_),
397  "MKLMemory layout does not match, despite in-place buffers");
398  CAFFE_ENFORCE(
399  other->dims() == dims(),
400  "MKLMemory dimensions do not match, despite in-place buffers");
401  VLOG(2) << "CopyTo does not need actual copying, as we are sharing "
402  "memory with the output.";
403  // This is already mapping to the same memory region. Skip copy.
404  return;
405  }
406  // TODO(jiayq): if primitive creation is a big overhead and we will be
407  // consistently copying stuff with fixed src and dst layouts, consider
408  // making a cache for the primitive below.
409  VLOG(2) << "CopyTo requires copying. Performing direct copy.";
410  if (dims() != other->dims()) {
411  other->Reset(dims(), primitive, type);
412  }
413  if (size_ == 0) {
414  VLOG(2) << "Cannot copy between empty MKL buffers.";
415  return;
416  }
417  CAFFE_ENFORCE(
418  buffer_.get(), "Cannot copy out from an uninitialized MKLMemory.");
419  PrimitiveWrapper<T> convert(
420  dnnConversionCreate<T>, layout_, other->layout_);
421  MKLDNN_SAFE_CALL(
422  dnnConversionExecute<T>(convert, buffer_.get(), other->buffer()));
423  }
424 
425  inline void* buffer() {
426  if (buffer_ == nullptr) {
427  CAFFE_ENFORCE(
428  layout_ != nullptr, "Trying to allocate buffer but layout is empty.");
429  if (size_ == 0) {
430  VLOG(2) << "Cannot allocate empty MKL buffer.";
431  return buffer_.get();
432  }
433  void* allocated = nullptr;
434  MKLDNN_SAFE_CALL(dnnAllocateBuffer<T>(&allocated, layout_));
435  buffer_.reset(allocated, [](void* ptr) -> void {
436  MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr));
437  });
438  }
439  return buffer_.get();
440  }
441 
442  // MKLDNN does not use const void* even for the inputs, so we will
443  // have to use void* and rely on the underlying implementation to make
444  // sure that the buffer is actually not changed.
445  inline void* buffer() const {
446  CAFFE_ENFORCE(
447  buffer_ != nullptr, "Trying to refer to an unallocated buffer.");
448  return buffer_.get();
449  }
450 
451  inline const vector<TIndex>& dims() const {
452  return dims_;
453  }
454 
455  inline const int ndim() const { return dims_.size(); }
456 
457  inline int dim32(const int i) const {
458  CAFFE_ENFORCE_LT(dims_.at(i), std::numeric_limits<int>::max());
459  return static_cast<int>(dims_[i]);
460  }
461 
465  inline TIndex size() const {
466  return size_;
467  }
468 
474  inline TIndex dim(const int i) const {
475  return dims_.at(i);
476  }
477 
478  inline const LayoutWrapper<T>& layout() const {
479  return layout_;
480  }
481 
482  inline bool is_user_layout() const {
483  return layout_is_user_layout_;
484  }
485 
486  // Returns a view of the content. We mark this function const, but be noted
487  // that the returned std::shared_ptr is not const protected - user discretion
488  // is recommended for correctness.
489  std::shared_ptr<void> View(
490  dnnLayout_t layout_wanted,
491  dnnPrimitive_t primitive = nullptr,
492  dnnResourceType_t type = dnnResourceNumber) const {
493  std::lock_guard<std::mutex> lock(buffer_lock_);
494  if (dnnLayoutCompare<T>(layout_wanted, layout_)) {
495  // If they are the same, return the original content.
496  VLOG(2) << "Creating a view without the need of copying.";
497  return std::shared_ptr<void>(buffer_);
498  } else {
499  void* temp_buffer;
500  VLOG(2) << "Creating a view with copying.";
501  MKLDNN_SAFE_CALL(dnnAllocateBuffer<T>(&temp_buffer, layout_wanted));
502  PrimitiveWrapper<T> convert(
503  dnnConversionCreate<T>, layout_, layout_wanted);
504  MKLDNN_SAFE_CALL(dnnConversionExecute<T>(
505  convert, buffer_.get(), temp_buffer));
506  if (primitive && FLAGS_caffe2_mkl_implicit_layout_change) {
507  VLOG(2) << "Implicit layout change set. "
508  "Changing the underlying storage.";
509  // We will need to call Reset to set up all the member variables.
510  // This is not thread safe, so we might want to double check if this
511  // makes sense in actual use cases.
512  const_cast<MKLMemory<T>*>(this)->Reset(
513  dims_, primitive, type, share_mem_if_possible_);
514  CAFFE_ENFORCE(dnnLayoutCompare<T>(layout_wanted, layout_),
515  "You passed in a target layout that is not "
516  "generated by the given primitive and type.");
517  buffer_.reset(temp_buffer, [](void* ptr) -> void {
518  MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr));
519  });
520  return std::shared_ptr<void>(buffer_);
521  } else {
522  return std::shared_ptr<void>(temp_buffer, [](void* ptr) -> void {
523  MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr));
524  });
525  }
526  }
527  }
528 
529  private:
530  bool share_mem_if_possible_;
531  bool layout_is_user_layout_;
532  // The internal buffer in the specific dnn layout.
533  // It is marked mutable but any modification in a const function should
534  // be accompanied by the buffer lock, see the View() function.
535  mutable std::shared_ptr<void> buffer_;
536  // A mutex to control the access of buffer in the View() function.
537  mutable std::mutex buffer_lock_;
538  // The dimensions in the same order as Caffe2 does. This is used to
539  // interface with C2.
540  vector<TIndex> dims_;
541  // Number of items in the buffer.
542  TIndex size_ = -1;
543  // The user dnn layout.
544  LayoutWrapper<T> user_layout_;
545  // The internal dnn layout.
546  LayoutWrapper<T> layout_;
547  // The primitive to use to convert from user layout to internal layout
548  PrimitiveWrapper<T> convert_in_;
549  // The primitive to use to convert from internal layout to user layout
550  PrimitiveWrapper<T> convert_out_;
551 
552  DISABLE_COPY_AND_ASSIGN(MKLMemory);
553 };
554 
555 template <typename T>
557  public:
558  MKLWorkspace(const LayoutWrapper<T>& layout) {
559  MKLDNN_SAFE_CALL(mkl::dnnAllocateBuffer<T>(&buffer_, layout));
560  }
561  ~MKLWorkspace() {
562  dnnReleaseBuffer<T>(buffer_);
563  }
564  T* buffer() {
565  return reinterpret_cast<T*>(buffer_);
566  }
567 
568  private:
569  void* buffer_;
570  DISABLE_COPY_AND_ASSIGN(MKLWorkspace);
571 };
572 
573 } // namespace mkl
574 } // namespace caffe2
575 
576 #endif // CAFFE2_UTILS_MKL_MKL_MEMORY_H_
TIndex dim(const int i) const
Returns the i-th dimension of the tensor.
Definition: tensor.h:687
TIndex dim(const int i) const
Returns the i-th dimension of the tensor.
Definition: mkl_memory.h:474
TIndex size() const
Returns the size (i.e.
Definition: tensor.h:609
T * mutable_data()
Returns a typed pointer of the underlying storage.
Definition: tensor.h:594
const vector< TIndex > & dims() const
Returns the dimensions of the tensor as a vector.
Definition: tensor.h:627
void Resize(Ts...dim_source)
Resizes a tensor.
Definition: tensor.h:304
Copyright (c) 2016-present, Facebook, Inc.
A wrapper around an opaque MKL internal resource that has certain layouts and convertion primitives s...
Definition: mkl_memory.h:153
void Reshape(const vector< IndexType > &dims)
Resizes the tensor without touching underlying storage.
Definition: mkl_memory.h:259
TIndex size() const
Returns the size (i.e., the number of items) in the buffer.
Definition: mkl_memory.h:465
Copyright (c) 2016-present, Facebook, Inc.
int ndim() const
Returns the number of dimensions of the data.
Definition: tensor.h:605