Caffe2 - C++ API
A deep learning, cross platform ML framework
serialization.cpp
1 #ifndef TH_GENERIC_FILE
2 #define TH_GENERIC_FILE "torch/csrc/generic/serialization.cpp"
3 #else
4 
5 #ifdef THC_GENERIC_FILE
6 #include <c10/cuda/CUDAGuard.h>
7 #endif
8 
9 template <class io>
10 void THPStorage_(writeFileRaw)(THWStorage *self, io fd)
11 {
12 #ifdef THC_GENERIC_FILE
13  c10::cuda::CUDAGuard guard(self->device());
14 #endif
15 
16  scalar_t *data;
17  int64_t size = THWStorage_(size)(LIBRARY_STATE self);
18 #ifndef THC_GENERIC_FILE
19  data = THWStorage_(data)(LIBRARY_STATE self);
20 #else
21  std::unique_ptr<char[]> cpu_data(new char[size * sizeof(scalar_t)]);
22  data = (scalar_t*)cpu_data.get();
23  THCudaCheck(cudaMemcpy(data, THWStorage_(data)(LIBRARY_STATE self), size * sizeof(scalar_t), cudaMemcpyDeviceToHost));
24 #endif
25  doWrite(fd, &size, sizeof(int64_t));
26  // fast track for bytes and little endian
27  if (sizeof(scalar_t) == 1 || THP_nativeByteOrder() == THPByteOrder::THP_LITTLE_ENDIAN) {
28  doWrite(fd, data, sizeof(scalar_t) * size);
29  } else {
30  int64_t buffer_size = std::min(size, (int64_t)5000);
31  std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * sizeof(scalar_t)]);
32  for (int64_t i = 0; i < size; i += buffer_size) {
33  size_t to_convert = std::min(size - i, buffer_size);
34  if (sizeof(scalar_t) == 2) {
35  THP_encodeInt16Buffer((uint8_t*)le_buffer.get(),
36  (const int16_t*)data + i,
37  THPByteOrder::THP_LITTLE_ENDIAN,
38  to_convert);
39  } else if (sizeof(scalar_t) == 4) {
40  THP_encodeInt32Buffer((uint8_t*)le_buffer.get(),
41  (const int32_t*)data + i,
42  THPByteOrder::THP_LITTLE_ENDIAN,
43  to_convert);
44  } else if (sizeof(scalar_t) == 8) {
45  THP_encodeInt64Buffer((uint8_t*)le_buffer.get(),
46  (const int64_t*)data + i,
47  THPByteOrder::THP_LITTLE_ENDIAN,
48  to_convert);
49  }
50  doWrite(fd, le_buffer.get(), to_convert * sizeof(scalar_t));
51  }
52  }
53 }
54 
55 template void THPStorage_(writeFileRaw<int>)(THWStorage *self, int fd);
56 template void THPStorage_(writeFileRaw<PyObject*>)(THWStorage *self, PyObject* fd);
57 
58 template <class io>
59 THWStorage * THPStorage_(readFileRaw)(io file, THWStorage *_storage)
60 {
61 #ifdef THC_GENERIC_FILE
63  if (_storage != nullptr) {
64  guard.set_device(_storage->device());
65  }
66 #endif
67 
68  scalar_t *data;
69  int64_t size;
70  doRead(file, &size, sizeof(int64_t));
71  THWStoragePtr storage;
72  if (_storage == nullptr) {
73  storage = THWStorage_(newWithSize)(LIBRARY_STATE size);
74  } else {
75  THPUtils_assert(THWStorage_(size)(LIBRARY_STATE _storage) == size,
76  "storage has wrong size: expected %ld got %ld",
77  size, THWStorage_(size)(LIBRARY_STATE _storage));
78  storage = _storage;
79  }
80 
81 #ifndef THC_GENERIC_FILE
82  data = THWStorage_(data)(LIBRARY_STATE storage);
83 #else
84  std::unique_ptr<char[]> cpu_data(new char[size * sizeof(scalar_t)]);
85  data = (scalar_t*)cpu_data.get();
86 #endif
87 
88  // fast track for bytes and little endian
89  if (sizeof(scalar_t) == 1 || THP_nativeByteOrder() == THPByteOrder::THP_LITTLE_ENDIAN) {
90  doRead(file, data, sizeof(scalar_t) * THWStorage_(size)(LIBRARY_STATE storage));
91  } else {
92  int64_t buffer_size = std::min(size, (int64_t)5000);
93  std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * sizeof(scalar_t)]);
94 
95 
96  for (int64_t i = 0; i < size; i += buffer_size) {
97  size_t to_convert = std::min(size - i, buffer_size);
98  doRead(file, le_buffer.get(), sizeof(scalar_t) * to_convert);
99 
100  if (sizeof(scalar_t) == 2) {
101  THP_decodeInt16Buffer((int16_t*)data + i,
102  le_buffer.get(),
103  THPByteOrder::THP_LITTLE_ENDIAN,
104  to_convert);
105  } else if (sizeof(scalar_t) == 4) {
106  THP_decodeInt32Buffer((int32_t*)data + i,
107  le_buffer.get(),
108  THPByteOrder::THP_LITTLE_ENDIAN,
109  to_convert);
110  } else if (sizeof(scalar_t) == 8) {
111  THP_decodeInt64Buffer((int64_t*)data + i,
112  le_buffer.get(),
113  THPByteOrder::THP_LITTLE_ENDIAN,
114  to_convert);
115  }
116  }
117  }
118 
119 #ifdef THC_GENERIC_FILE
120  THCudaCheck(cudaMemcpy(THWStorage_(data)(LIBRARY_STATE storage), data, size * sizeof(scalar_t), cudaMemcpyHostToDevice));
121 #endif
122  return storage.release();
123 }
124 
125 template THWStorage* THPStorage_(readFileRaw<int>)(int fd, THWStorage* storage);
126 template THWStorage* THPStorage_(readFileRaw<PyObject*>)(PyObject* fd, THWStorage* storage);
127 
128 #endif
void set_device(Device device)
Sets the CUDA device to the given device, initializing the guard if it is not already initialized...
Definition: CUDAGuard.h:88
A variant of OptionalDeviceGuard that is specialized for CUDA.
Definition: CUDAGuard.h:65
A variant of DeviceGuard that is specialized for CUDA.
Definition: CUDAGuard.h:20