1 #include <gtest/gtest.h> 3 #include <ATen/cuda/CUDAContext.h> 4 #include <c10/cuda/CUDAGuard.h> 5 #include <ATen/cuda/CUDAMultiStreamGuard.h> 6 #include <ATen/cuda/CUDAEvent.h> 8 #include <cuda_runtime.h> 12 #include <unordered_set> 14 #define ASSERT_EQ_CUDA(X, Y) \ 16 bool isTRUE = X == Y; \ 17 ASSERT_TRUE(isTRUE); \ 20 #define ASSERT_NE_CUDA(X, Y) \ 22 bool isFALSE = X == Y; \ 23 ASSERT_FALSE(isFALSE); \ 30 TEST(TestStream, CopyAndMoveTest) {
31 if (!at::cuda::is_available())
return;
33 cudaStream_t cuda_stream;
38 auto s = at::cuda::getStreamFromPool();
39 device = s.device_index();
40 cuda_stream = s.stream();
45 ASSERT_EQ_CUDA(copyStream.
stream(), cuda_stream);
49 ASSERT_EQ_CUDA(copyStream.
stream(), cuda_stream);
54 auto s = at::cuda::getStreamFromPool();
55 device = s.device_index();
56 cuda_stream = s.stream();
58 moveStream = std::move(s);
60 ASSERT_EQ_CUDA(moveStream.device_index(), device);
61 ASSERT_EQ_CUDA(moveStream.stream(), cuda_stream);
64 ASSERT_EQ_CUDA(moveStream.device_index(), device);
65 ASSERT_EQ_CUDA(moveStream.stream(), cuda_stream);
69 TEST(TestStream, GetAndSetTest) {
70 if (!at::cuda::is_available())
return;
74 at::cuda::setCurrentCUDAStream(myStream);
77 ASSERT_EQ_CUDA(myStream, curStream);
81 at::cuda::setCurrentCUDAStream(defaultStream);
82 curStream = at::cuda::getCurrentCUDAStream();
84 ASSERT_NE_CUDA(defaultStream, myStream);
85 ASSERT_EQ_CUDA(curStream, defaultStream);
89 auto new_stream = at::cuda::getStreamFromPool();
90 at::cuda::setCurrentCUDAStream(new_stream);
91 cur_thread_stream = {at::cuda::getCurrentCUDAStream()};
92 ASSERT_EQ_CUDA(*cur_thread_stream, new_stream);
96 TEST(TestStream, MultithreadGetAndSetTest) {
97 if (!at::cuda::is_available())
return;
100 std::thread t0{thread_fun, std::ref(s0)};
101 std::thread t1{thread_fun, std::ref(s1)};
108 ASSERT_EQ_CUDA(cur_stream, default_stream);
109 ASSERT_NE_CUDA(cur_stream, *s0);
110 ASSERT_NE_CUDA(cur_stream, *s1);
111 ASSERT_NE_CUDA(s0, s1);
115 TEST(TestStream, CUDAGuardTest) {
116 if (!at::cuda::is_available())
return;
117 if (at::cuda::getNumGPUs() < 2) {
123 ASSERT_EQ_CUDA(at::cuda::current_device(), 0);
124 std::vector<at::cuda::CUDAStream> streams0 = {
125 at::cuda::getDefaultCUDAStream(), at::cuda::getStreamFromPool()};
126 ASSERT_EQ_CUDA(streams0[0].device_index(), 0);
127 ASSERT_EQ_CUDA(streams0[1].device_index(), 0);
128 at::cuda::setCurrentCUDAStream(streams0[0]);
130 std::vector<at::cuda::CUDAStream> streams1;
133 streams1.push_back(at::cuda::getDefaultCUDAStream());
134 streams1.push_back(at::cuda::getStreamFromPool());
136 ASSERT_EQ_CUDA(streams1[0].device_index(), 1);
137 ASSERT_EQ_CUDA(streams1[1].device_index(), 1);
138 at::cuda::setCurrentCUDAStream(streams1[0]);
140 ASSERT_EQ_CUDA(at::cuda::current_device(), 0);
147 ASSERT_EQ_CUDA(guard.original_streams().size(), at::cuda::getNumGPUs());
148 ASSERT_EQ_CUDA(guard.original_streams()[0], streams0[0]);
149 ASSERT_EQ_CUDA(guard.original_streams()[1], streams1[0]);
155 ASSERT_EQ_CUDA(guard.current_device(),
at::Device(at::kCUDA, 1));
156 ASSERT_EQ_CUDA(at::cuda::current_device(), 1);
157 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(1), streams1[1]);
161 ASSERT_EQ_CUDA(at::cuda::current_device(), 0);
162 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(1), streams1[0]);
167 ASSERT_EQ_CUDA(guard.current_device(),
at::Device(at::kCUDA, 1));
168 ASSERT_EQ_CUDA(at::cuda::current_device(), 1);
169 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(1), streams1[0]);
172 ASSERT_EQ_CUDA(at::cuda::current_device(), 0);
173 ASSERT_EQ_CUDA(at::cuda::getCurrentCUDAStream(0), streams0[0]);
177 TEST(TestStream, StreamPoolTest) {
178 if (!at::cuda::is_available())
return;
179 std::vector<at::cuda::CUDAStream> streams{};
180 for (
int i = 0; i < 200; ++i) {
181 streams.emplace_back(at::cuda::getStreamFromPool());
184 std::unordered_set<cudaStream_t> stream_set{};
185 bool hasDuplicates =
false;
186 for (
auto i = decltype(streams.size()){0}; i < streams.size(); ++i) {
187 cudaStream_t cuda_stream = streams[i];
188 auto result_pair = stream_set.insert(cuda_stream);
189 if (!result_pair.second)
190 hasDuplicates =
true;
193 ASSERT_TRUE(hasDuplicates);
197 TEST(TestStream, MultiGPUTest) {
198 if (!at::cuda::is_available())
return;
199 if (at::cuda::getNumGPUs() < 2)
205 at::cuda::setCurrentCUDAStream(s0);
206 at::cuda::setCurrentCUDAStream(s1);
208 ASSERT_EQ_CUDA(s0, at::cuda::getCurrentCUDAStream());
211 ASSERT_EQ_CUDA(s1, at::cuda::getCurrentCUDAStream());
215 TEST(TestStream, CUDAEventSyncTest) {
216 if (!at::cuda::is_available())
return;
217 const auto stream = at::cuda::getStreamFromPool();
220 ASSERT_TRUE(event.query());
222 event.recordOnce(stream);
224 const auto wait_stream0 = at::cuda::getStreamFromPool();
225 const auto wait_stream1 = at::cuda::getStreamFromPool();
227 event.block(wait_stream0);
228 event.block(wait_stream1);
230 cudaStreamSynchronize(wait_stream0);
231 ASSERT_TRUE(event.query());
235 TEST(TestStream, CrossDeviceTest) {
236 if (!at::cuda::is_available())
return;
237 if (at::cuda::getNumGPUs() < 2)
240 const auto stream0 = at::cuda::getStreamFromPool();
243 at::cuda::set_device(1);
244 const auto stream1 = at::cuda::getStreamFromPool();
247 event0.record(stream0);
248 event1.record(stream1);
250 event0 = std::move(event1);
252 ASSERT_EQ_CUDA(event0.device(),
at::Device(at::kCUDA, 1));
254 event0.block(stream0);
256 cudaStreamSynchronize(stream0);
257 ASSERT_TRUE(event0.query());
Represents a a compute device on which a tensor is located.
A variant of StreamGuard that is specialized for CUDA.
A variant of DeviceGuard that is specialized for CUDA.
cudaStream_t stream() const
Explicit conversion to cudaStream_t.
DeviceIndex device_index() const
Get the CUDA device index that this stream is associated with.