Caffe2 - C++ API
A deep learning, cross platform ML framework
cuda_tensor_interop_test.cpp
1 #include "gtest/gtest.h"
2 
3 #include "ATen/ATen.h"
4 #include <ATen/cuda/CUDAContext.h>
5 #include <caffe2/core/init.h>
6 #include <caffe2/core/operator.h>
7 #include <caffe2/core/context_gpu.h>
8 #include <caffe2/utils/math.h>
9 
10 // dumbest possible copies
11 template<typename T>
12 T cuda_get(T* addr) {
13  T result;
14  CUDA_ENFORCE(cudaMemcpy(&result, addr, sizeof(T), cudaMemcpyDefault));
15  return result;
16 }
17 
18 template<typename T>
19 void cuda_set(T* addr, T value) {
20  CUDA_ENFORCE(cudaMemcpy(addr, &value, sizeof(T), cudaMemcpyDefault));
21 }
22 
23 TEST(CUDACaffe2ToPytorch, SimpleLegacy) {
24  if (!at::cuda::is_available()) return;
25  caffe2::Tensor c2_tensor(caffe2::CUDA);
26  c2_tensor.Resize(4, 4);
27  auto data = c2_tensor.mutable_data<int64_t>();
28  {
29  caffe2::CUDAContext context;
30  caffe2::math::Set<int64_t>(16, 777, data, &context);
31  }
32  at::Tensor at_tensor(c2_tensor);
33  ASSERT_TRUE(&at_tensor.type() != nullptr);
34  ASSERT_TRUE(at_tensor.is_cuda());
35 
36  auto at_cpu = at_tensor.cpu();
37  auto it = at_cpu.data<int64_t>();
38  for (int64_t i = 0; i < 16; i++) {
39  ASSERT_EQ(it[i], 777);
40  }
41 }
42 
43 TEST(CUDACaffe2ToPytorch, Simple) {
44  if (!at::cuda::is_available()) return;
45  caffe2::Tensor c2_tensor =
46  caffe2::empty({4, 4}, at::dtype<int64_t>().device(caffe2::CUDA));
47  auto data = c2_tensor.mutable_data<int64_t>();
48  {
49  caffe2::CUDAContext context;
50  caffe2::math::Set<int64_t>(16, 777, data, &context);
51  }
52  at::Tensor at_tensor(c2_tensor);
53  ASSERT_TRUE(&at_tensor.type() != nullptr);
54  ASSERT_TRUE(at_tensor.is_cuda());
55 
56  auto at_cpu = at_tensor.cpu();
57  auto it = at_cpu.data<int64_t>();
58  for (int64_t i = 0; i < 16; i++) {
59  ASSERT_EQ(it[i], 777);
60  }
61 }
62 
63 TEST(CUDACaffe2ToPytorch, Op) {
64  if (!at::cuda::is_available()) return;
65  caffe2::Tensor c2_tensor =
66  caffe2::empty({3, 3}, at::dtype<int64_t>().device(caffe2::CUDA));
67  auto data = c2_tensor.mutable_data<int64_t>();
68  {
69  caffe2::CUDAContext context;
70  caffe2::math::Set<int64_t>(9, 111, data, &context);
71  }
72  at::Tensor at_tensor(c2_tensor);
73  ASSERT_TRUE(at_tensor.is_cuda());
74 
75  ASSERT_EQ(at::sum(at_tensor).item<int64_t>(), 999);
76 }
77 
78 TEST(CUDAPytorchToCaffe2, Op) {
79  if (!at::cuda::is_available()) return;
80  caffe2::Workspace workspace;
81  caffe2::NetDef net;
82 
83  auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
84  auto at_tensor_b = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
85  auto at_tensor_c = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
86 
87  auto* c2_tensor_a = BlobSetTensor(workspace.CreateBlob("a"), caffe2::Tensor(at_tensor_a));
88  auto* c2_tensor_b = BlobSetTensor(workspace.CreateBlob("b"), caffe2::Tensor(at_tensor_b));
89 
90  // Test Alias
91  {
92  caffe2::Tensor c2_tensor_from_aten(at_tensor_c);
93  BlobSetTensor(workspace.CreateBlob("c"), c2_tensor_from_aten.Alias());
94  }
95 
96  {
97  auto op = net.add_op();
98  op->set_type("Sum");
99  op->add_input("a");
100  op->add_input("b");
101  op->add_input("c");
102  op->add_output("d");
103  op->mutable_device_option()->set_device_type(caffe2::PROTO_CUDA);
104  }
105 
106  workspace.RunNetOnce(net);
107 
108  const auto& result = workspace.GetBlob("d")->Get<caffe2::Tensor>();
109  ASSERT_EQ(result.GetDeviceType(), caffe2::CUDA);
110 
111  auto data = result.data<float>();
112  for (int64_t i = 0; i < 25; i++) {
113  ASSERT_EQ(cuda_get(data + i), 3.0);
114  }
115  at::Tensor at_result(result);
116  ASSERT_TRUE(at_result.is_cuda());
117  ASSERT_EQ(at::sum(at_result).item<float>(), 75);
118 }
119 
120 TEST(CUDAPytorchToCaffe2, SharedStorageWrite) {
121  if (!at::cuda::is_available()) return;
122  auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
123  auto at_tensor_b = at_tensor_a.view({25});
124 
125  caffe2::Tensor c2_tensor_a(at_tensor_a);
126  caffe2::Tensor c2_tensor_b(at_tensor_b);
127 
128  // change is visible everywhere
129  cuda_set<float>(c2_tensor_a.mutable_data<float>() + 1, 123);
130  ASSERT_EQ(cuda_get(c2_tensor_b.mutable_data<float>() + 1), 123);
131  ASSERT_EQ(at_tensor_a[0][1].item().to<float>(), 123);
132  ASSERT_EQ(at_tensor_b[1].item().to<float>(), 123);
133 }
134 
135 TEST(CUDAPytorchToCaffe2, MutualResizes) {
136  if (!at::cuda::is_available()) return;
137  auto at_tensor = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
138 
139  caffe2::Tensor c2_tensor(at_tensor);
140 
141  // change is visible
142  cuda_set<float>(c2_tensor.mutable_data<float>(), 123);
143  ASSERT_EQ(at_tensor[0][0].item().to<float>(), 123);
144 
145  // resize PT tensor in smaller direction - storage is preserved
146  at_tensor.resize_({4, 4});
147  cuda_set<float>(c2_tensor.mutable_data<float>() + 1, 234);
148  ASSERT_EQ(at_tensor[0][1].item().to<float>(), 234);
149 
150  // resize PT tensor in larger direction - storage is preserved
151  at_tensor.resize_({6, 6});
152  cuda_set<float>(c2_tensor.mutable_data<float>() + 2, 345);
153  ASSERT_EQ(at_tensor[0][2].item().to<float>(), 345);
154  ASSERT_EQ(c2_tensor.sizes()[0], 6);
155  ASSERT_EQ(c2_tensor.sizes()[1], 6);
156 
157  // resize Caffe2 tensor - semantics are to NOT preserve the data, but the
158  // TensorImpl is still shared
159  c2_tensor.Resize(7, 7);
160  cuda_set<float>(c2_tensor.mutable_data<float>() + 3, 456);
161  ASSERT_EQ(at_tensor[0][3].item().to<float>(), 456);
162  ASSERT_EQ(at_tensor.sizes()[0], 7);
163  ASSERT_EQ(at_tensor.sizes()[1], 7);
164 }
Blob * CreateBlob(const string &name)
Creates a blob of the given name.
Definition: workspace.cc:100
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
const Blob * GetBlob(const string &name) const
Gets the blob with the given name as a const pointer.
Definition: workspace.cc:160
const T & Get() const
Gets the const reference of the stored object.
Definition: blob.h:71