7 from itertools
import repeat
9 from contextlib
import contextmanager
16 from torch
import multiprocessing
as mp
19 from test_torch
import _TestTorchMixin
21 from common_methods_invocations
import tri_tests_args, tri_large_tests_args, \
22 run_additional_tri_tests, _compare_trilu_indices, _compare_large_trilu_indices
23 from common_utils
import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests, \
24 PY3, IS_WINDOWS, NO_MULTIPROCESSING_SPAWN, skipIfRocm, TEST_NUMPY, TEST_WITH_ROCM, load_tests, iter_indices
28 load_tests = load_tests
38 print(
'CUDA not available, skipping tests')
41 TEST_MAGMA = TEST_CUDA
42 TEST_LARGE_TENSOR = TEST_CUDA
45 TEST_MAGMA = torch.cuda.has_magma
48 floating_set = {torch.FloatTensor, torch.DoubleTensor, torch.cuda.FloatTensor,
49 torch.cuda.DoubleTensor, torch.HalfTensor, torch.cuda.HalfTensor}
53 if not isinstance(t, type):
54 raise TypeError(
't should be an instance of type')
55 assert t != torch.autograd.Variable
56 return t
in floating_set
60 if isinstance(t, torch.Tensor):
61 return t.dtype == torch.float16
62 assert isinstance(t, type)
63 assert t != torch.autograd.Variable
64 return t
in [torch.HalfTensor, torch.cuda.HalfTensor]
97 float_types_no_half = [
103 def number(floating, integer, t):
104 return floating
if is_floating(t)
else integer
107 def cast_tensor(tensor, t):
108 return t(tensor.size()).copy_(tensor)
115 def make_tensor(t, *sizes):
116 if 'Half' in t.__name__:
117 return t(*sizes).copy_(torch.randn(*sizes))
120 if tensor.is_floating_point():
121 return tensor.normal_()
123 return tensor.random_(0, 10)
126 def make_sparse_tensor(t, n, *sizes):
129 i = tensor._indices()
130 i = i.new(len(sizes), n).copy_(
131 torch.cat([torch.LongTensor(1, n).random_(s)
for s
in sizes], 0))
133 v = v.new(n).copy_(torch.randn(n))
134 return t(i, v, torch.Size(sizes))
137 def tensor_clamp(t, min, max):
139 return t.float().clamp(min, max).half()
141 return t.clamp(min, max)
144 def tensor_mul(t, scale):
146 return t.float().mul(scale).half()
153 return t.float().abs_().half()
158 def constant_tensor_sub(a, b):
162 return (a - b.float()).half()
167 def constant_tensor_add(a, b):
171 return (a + b.float()).half()
177 return make_tensor(t, (1,)).squeeze()
181 return make_tensor(t, S, S)
184 def small_2d_scaled(t, scale=10):
185 return tensor_mul(make_tensor(t, S, S), scale)
188 def small_2d_oneish(t):
190 return tensor_clamp(make_tensor(t, S, S), min=0.99, max=1.01)
192 return t(S, S).fill_(1)
196 return make_tensor(t, S, S, S)
200 return make_tensor(t, M)
204 return make_tensor(t, M, M)
207 def medium_2d_expanded(t):
208 return t(1).expand(M, M)
211 def medium_2d_scaled(t, scale=10):
212 return tensor_mul(make_tensor(t, M, M), scale)
215 def small_3d_ones(t):
216 return t(S, S, S).copy_(torch.ones(S, S, S))
219 def small_3d_positive(t):
221 min_val = 1e-3
if is_floating(t)
and not is_half(t)
else 2
222 return tensor_clamp(make_tensor(t, S, S, S), min_val, 120)
225 def small_3d_unique(t):
226 return t(S, S, S).copy_(torch.arange(1, S * S * S + 1).view(S, S, S))
229 def small_1d_lapack(t):
230 return t(1, 3).copy_(torch.arange(1, 4).view(3))
233 def small_2d_lapack(t):
234 return t(3, 3).copy_(torch.arange(1, 10).view(3, 3))
237 def small_2d_lapack_skinny(t):
238 return t(3, 4).copy_(torch.arange(1, 13).view(3, 4))
241 def small_2d_lapack_fat(t):
242 return t(4, 3).copy_(torch.arange(1, 13).view(4, 3))
245 def large_2d_lapack(t):
246 return t(1000, 1000).normal_()
249 def giant_1d_ones(t):
250 return t(G).copy_(torch.ones(G))
254 return torch.cuda.LongTensor
if 'cuda' in t.__module__
else torch.LongTensor
259 return t(*sizes).copy_(torch.randn(*sizes))
271 (
'add', small_3d,
lambda t: [number(3.14, 3, t)]),
272 (
'add', small_3d,
lambda t: [small_3d_positive(t)],
'tensor'),
273 (
'add', small_3d,
lambda t: [number(0.2, 2, t), small_3d_positive(t)],
'scalar_tensor'),
274 (
'sub', small_3d,
lambda t: [number(3.14, 3, t)]),
275 (
'sub', small_3d,
lambda t: [small_3d_positive(t)],
'tensor'),
276 (
'mul', small_3d,
lambda t: [number(3.14, 3, t)]),
277 (
'mul', small_3d,
lambda t: [small_3d_positive(t)],
'tensor'),
278 (
'mul', small_0d,
lambda t: [small_0d(torch.IntTensor)],
'scalar', types,
True),
279 (
'div', small_3d,
lambda t: [number(3.14, 3, t)]),
280 (
'div', small_3d,
lambda t: [small_3d_positive(t)],
'tensor'),
281 (
'pow', small_3d,
lambda t: [number(3.14, 3, t)],
None, float_types),
282 (
'pow', small_3d,
lambda t: [number(1., 1, t)],
'pow1'),
283 (
'pow', small_3d,
lambda t: [number(2., 2, t)],
'pow2'),
284 (
'pow', small_3d,
lambda t: [number(3., 3, t)],
'pow3'),
285 (
'pow', small_3d,
lambda t: [number(-1., -1, t)],
'pow-1', float_types),
287 (
'pow', small_3d,
lambda t: [number(-2., -2, t)],
'pow-2', float_types_no_half,
False,
288 "skipIfRocm:FloatTensor"),
289 (
'pow', small_3d,
lambda t: [tensor_abs_(small_3d(t))],
'tensor', float_types),
290 (
'addbmm', small_2d,
lambda t: [small_3d(t), small_3d(t)],
None, float_types),
291 (
'addbmm', small_2d,
lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)],
'scalar'),
292 (
'addbmm', small_2d,
lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)],
'two_scalars'),
293 (
'baddbmm', small_3d,
lambda t: [small_3d(t), small_3d(t)],),
294 (
'baddbmm', small_3d,
lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)],
'scalar'),
295 (
'baddbmm', small_3d,
lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)],
'two_scalars'),
296 (
'bmm', small_3d,
lambda t: [small_3d(t)],
'', float_types_no_half),
297 (
'addcdiv', small_2d_lapack,
lambda t: [tensor_mul(small_2d_lapack(t), 2), small_2d_lapack(t)]),
298 (
'addcdiv', small_2d_lapack,
lambda t: [number(2.8, 1, t), tensor_mul(small_2d_lapack(t), 2), small_2d_lapack(t)],
300 (
'addcmul', small_3d,
lambda t: [small_3d(t), small_3d(t)]),
301 (
'addcmul', small_3d,
lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)],
'scalar'),
302 (
'addmm', medium_2d,
lambda t: [medium_2d(t), medium_2d(t)]),
303 (
'addmm', medium_2d,
lambda t: [number(0.4, 2, t), medium_2d(t), medium_2d(t)],
'scalar'),
304 (
'addmm', medium_2d,
lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_2d(t)],
'two_scalars'),
305 (
'addmv', medium_1d,
lambda t: [medium_2d(t), medium_1d(t)],),
306 (
'addmv', medium_1d,
lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)],
'scalar'),
307 (
'addmv', medium_1d,
lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)],
'two_scalars'),
308 (
'addr', medium_2d,
lambda t: [medium_1d(t), medium_1d(t)]),
309 (
'addr', medium_2d,
lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)],
'scalar'),
310 (
'addr', medium_2d,
lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)],
'two_scalars'),
311 (
'atan2', medium_2d,
lambda t: [medium_2d(t)],
None, float_types + [torch.HalfTensor]),
312 (
'fmod', small_3d,
lambda t: [3],
'value',),
313 (
'fmod', small_3d,
lambda t: [small_3d_positive(t)],
'tensor'),
314 (
'chunk', medium_2d,
lambda t: [4],),
315 (
'chunk', medium_2d,
lambda t: [4, 1],
'dim'),
316 (
'chunk', medium_2d,
lambda t: [4, -2],
'neg_dim'),
317 (
'clamp', medium_2d_scaled,
lambda t: [-1, 5],
None, signed_types),
318 (
'clamp', medium_2d_scaled,
lambda t: [1, 5],
None, unsigned_types),
319 (
'clone', medium_2d,
lambda t: [],),
320 (
'contiguous', medium_2d,
lambda t: [],),
321 (
'cross', new_t(M, 3, M),
lambda t: [new_t(M, 3, M)(t)],),
322 (
'cumprod', small_3d,
lambda t: [1]),
323 (
'cumprod', small_3d,
lambda t: [-1],
'neg_dim'),
324 (
'cumsum', small_3d,
lambda t: [1]),
325 (
'cumsum', small_3d,
lambda t: [-1],
'neg_dim'),
326 (
'dim', small_3d,
lambda t: [],),
327 (
'dist', small_2d,
lambda t: [small_2d(t)]),
328 (
'dist', small_2d,
lambda t: [small_2d(t), 3],
'3_norm'),
329 (
'dist', small_2d,
lambda t: [small_2d(t), 2.5],
'2_5_norm'),
330 (
'dot', medium_1d,
lambda t: [medium_1d(t)],
'', types,
False,
"skipIfRocm:HalfTensor"),
331 (
'element_size', medium_1d,
lambda t: [],),
332 (
'eq', small_3d_ones,
lambda t: [small_3d(t)],),
333 (
'eq', small_3d_ones,
lambda t: [small_3d_ones(t)],
'equal'),
334 (
'ne', small_3d_ones,
lambda t: [small_3d(t)],),
335 (
'ne', small_3d_ones,
lambda t: [small_3d_ones(t)],
'equal'),
336 (
'equal', small_3d_ones,
lambda t: [small_3d_ones(t)],
'equal'),
337 (
'equal', small_3d_ones,
lambda t: [small_3d(t)],),
338 (
'expand', new_t(M, 1, M),
lambda t: [M, 4, M],),
339 (
'expand_as', new_t(M, 1, M),
lambda t: [new_t(M, 4, M)(t)],),
340 (
'fill', medium_2d,
lambda t: [number(3.14, 3, t)]),
341 (
'ge', medium_2d,
lambda t: [medium_2d(t)],),
342 (
'le', medium_2d,
lambda t: [medium_2d(t)],),
343 (
'gt', medium_2d,
lambda t: [medium_2d(t)],),
344 (
'lt', medium_2d,
lambda t: [medium_2d(t)],),
345 (
'is_contiguous', medium_2d,
lambda t: [],),
347 (
'is_same_size', medium_2d,
lambda t: [small_3d(t)],
'negative'),
348 (
'is_same_size', medium_2d,
lambda t: [medium_2d(t)],
'positive'),
349 (
'is_set_to', medium_2d,
lambda t: [medium_2d(t)],),
351 (
'kthvalue', small_3d_unique,
lambda t: [3],),
352 (
'kthvalue', small_3d_unique,
lambda t: [3, 1],
'dim'),
353 (
'kthvalue', small_3d_unique,
lambda t: [3, -1],
'neg_dim'),
354 (
'lerp', small_3d,
lambda t: [small_3d(t), 0.3]),
355 (
'max', small_3d_unique,
lambda t: []),
356 (
'max', small_3d_unique,
lambda t: [1],
'dim'),
357 (
'max', small_3d_unique,
lambda t: [-1],
'neg_dim'),
358 (
'max', medium_2d,
lambda t: [medium_2d(t)],
'elementwise'),
359 (
'min', small_3d_unique,
lambda t: []),
360 (
'min', small_3d_unique,
lambda t: [1],
'dim'),
361 (
'min', small_3d_unique,
lambda t: [-1],
'neg_dim'),
362 (
'min', medium_2d,
lambda t: [medium_2d(t)],
'elementwise'),
363 (
'mean', small_3d,
lambda t: []),
364 (
'mean', small_3d,
lambda t: [-1],
'neg_dim'),
365 (
'mean', small_3d,
lambda t: [1],
'dim'),
366 (
'mean', giant_1d_ones,
lambda t: [],
'64bit_indexing',
369 [torch.DoubleTensor]),
370 (
'mode', small_3d,
lambda t: []),
371 (
'mode', small_3d,
lambda t: [1],
'dim'),
372 (
'mode', small_3d,
lambda t: [-1],
'neg_dim'),
373 (
'mvlgamma',
lambda t: tensor_clamp(small_2d(t), 0.1, 10),
lambda t: [1],
'2d_p=1', float_types_no_half),
374 (
'mvlgamma',
lambda t: tensor_clamp(small_2d(t), 0.6, 10),
lambda t: [2],
'2d_p=2', float_types_no_half),
375 (
'remainder', small_3d,
lambda t: [3],
'value',),
376 (
'remainder', small_3d,
lambda t: [-3],
'negative_value', signed_types),
377 (
'remainder', small_3d,
lambda t: [small_3d_positive(t)],
'tensor'),
378 (
'remainder', small_3d,
lambda t: [constant_tensor_sub(0, small_3d_positive(t))],
'negative_tensor', signed_types),
379 (
'std', small_3d,
lambda t: []),
380 (
'std', small_3d,
lambda t: [1],
'dim', types,
False),
381 (
'std', small_3d,
lambda t: [-1],
'neg_dim', types,
False),
382 (
'var', small_3d,
lambda t: []),
383 (
'var', small_3d,
lambda t: [1],
'dim'),
384 (
'var', small_3d,
lambda t: [-1],
'neg_dim'),
385 (
'ndimension', small_3d,
lambda t: [],),
386 (
'nelement', small_3d,
lambda t: [],),
387 (
'numel', small_3d,
lambda t: [],),
388 (
'narrow', small_3d,
lambda t: [1, 3, 2],),
389 (
'narrow', small_3d,
lambda t: [-1, 3, 2],
'neg_dim'),
390 (
'nonzero', small_3d,
lambda t: [],
'', types,
False),
391 (
'norm', small_3d,
lambda t: []),
392 (
'norm', small_3d,
lambda t: [3],
'3_norm'),
393 (
'norm', small_3d,
lambda t: [3, 0],
'3_norm_dim'),
394 (
'norm', small_3d,
lambda t: [3, -2],
'3_norm_neg_dim'),
395 (
'ones', small_3d,
lambda t: [1, 2, 3, 4, 5],),
396 (
'permute', new_t(1, 2, 3, 4),
lambda t: [2, 1, 3, 0],),
397 (
'put_', new_t(2, 5, 3),
lambda t: [long_type(t)([[0], [-2]]), t([[3], [4]])],
'', types,
False),
398 (
'put_', new_t(2, 3),
lambda t: [long_type(t)([]), t([])],
'empty'),
399 (
'put_', new_t(2, 2),
lambda t: [long_type(t)([[1], [-3]]), t([[1], [2]]),
True],
'accumulate'),
400 (
'prod', small_2d_oneish,
lambda t: []),
401 (
'prod', small_3d,
lambda t: [1],
'dim'),
402 (
'prod', small_3d,
lambda t: [-1],
'neg_dim'),
403 (
'sum', small_2d,
lambda t: []),
404 (
'sum', small_3d,
lambda t: [1],
'dim'),
405 (
'sum', small_3d,
lambda t: [-1],
'neg_dim'),
406 (
'renorm', small_3d,
lambda t: [2, 1, 1],
'2_norm'),
407 (
'renorm', small_3d,
lambda t: [2, -1, 1],
'2_norm_neg_dim'),
408 (
'renorm', small_3d,
lambda t: [1.5, 1, 1],
'1_5_norm'),
409 (
'repeat', small_2d,
lambda t: [2, 2, 2],),
410 (
'size', new_t(1, 2, 3, 4),
lambda t: [],),
411 (
'size', new_t(1, 2, 3, 4),
lambda t: [1],
'dim'),
412 (
'size', new_t(1, 2, 3, 4),
lambda t: [-2],
'neg_dim'),
413 (
'sort', small_3d_unique,
lambda t: [],
''),
414 (
'sort', small_3d_unique,
lambda t: [1],
'dim'),
415 (
'sort', small_3d_unique,
lambda t: [-1],
'neg_dim'),
416 (
'sort', small_3d_unique,
lambda t: [1,
True],
'dim_descending'),
417 (
'sort', small_3d_unique,
lambda t: [-1,
True],
'neg_dim_descending'),
418 (
'split', small_3d,
lambda t: [2],),
419 (
'split', small_3d,
lambda t: [2, 1],
'dim'),
420 (
'split', small_3d,
lambda t: [2, -3],
'neg_dim'),
421 (
'squeeze', new_t(1, 2, 1, 4),
lambda t: [],),
422 (
'squeeze', new_t(1, 2, 1, 4),
lambda t: [2],
'dim'),
423 (
'squeeze', new_t(1, 2, 1, 4),
lambda t: [-2],
'neg_dim'),
424 (
't', new_t(1, 2),
lambda t: [],),
425 (
'take', new_t(3, 4),
lambda t: [long_type(t)([[0], [-2]])],
'', types,
False),
426 (
'transpose', new_t(1, 2, 3, 4),
lambda t: [1, 2],),
427 (
'transpose', new_t(1, 2, 3, 4),
lambda t: [-1, -2],
'neg_dim'),
428 (
'to_list', small_3d,
lambda t: [],),
429 (
'topk', small_3d_unique,
lambda t: [2, 1,
False,
True],
'dim_sort',),
430 (
'topk', small_3d_unique,
lambda t: [2, -1,
False,
True],
'neg_dim_sort',),
431 (
'topk', small_3d_unique,
lambda t: [2, 1,
True,
True],
'dim_desc_sort',),
432 (
'trace', medium_2d,
lambda t: []),
433 (
'tril', medium_2d,
lambda t: [],),
434 (
'tril', medium_2d_expanded,
lambda t: [],
'zero_stride', types,
True),
435 (
'tril', medium_2d,
lambda t: [2],
'positive'),
436 (
'tril', medium_2d,
lambda t: [-2],
'negative'),
437 (
'triu', medium_2d,
lambda t: [],),
438 (
'triu', medium_2d_expanded,
lambda t: [],
'zero_stride', types,
True),
439 (
'triu', medium_2d,
lambda t: [2],
'positive'),
440 (
'triu', medium_2d,
lambda t: [-2],
'negative'),
441 (
'unsqueeze', new_t(2, 3, 4),
lambda t: [2],),
442 (
'unsqueeze', new_t(2, 3, 4),
lambda t: [-2],
'neg_dim'),
443 (
'view', small_3d,
lambda t: [100, 10],
'contiguous'),
444 (
'view_as', small_3d,
lambda t: [make_tensor(t, 100, 10)],),
445 (
'zero', small_3d,
lambda t: [],),
446 (
'zeros', small_3d,
lambda t: [1, 2, 3, 4],),
447 (
'eye', small_2d,
lambda t: [3, 4],),
448 (
'flip', small_3d,
lambda t: [0],
'd0', types,
True),
449 (
'flip', small_3d,
lambda t: [0, 1, 2],
'd012', types,
True),
450 (
'flip', small_3d,
lambda t: [0, 2],
'd02', types,
True),
451 (
'flip', small_3d,
lambda t: [2, 0],
'd20', types,
True),
452 (
'flip', small_3d,
lambda t: [-1],
'neg_d', types,
True),
453 (
'rot90', small_2d,
lambda t: [1, [0, 1]],
'k1_d01', types,
True),
454 (
'rot90', small_3d,
lambda t: [1, [1, 2]],
'k1_d12', types,
True),
455 (
'rot90', small_3d,
lambda t: [1, [1, -1]],
'k1_neg_d', types,
True),
456 (
'rot90', small_3d,
lambda t: [],
'default', types,
True),
457 (
'rsqrt',
lambda t: constant_tensor_add(1, small_3d(t)),
lambda t: [],
None, float_types),
458 (
'sinh',
lambda t: tensor_clamp(small_3d(t), -1, 1),
lambda t: [],
None, float_types),
459 (
'tan',
lambda t: tensor_clamp(small_3d(t), -1, 1),
lambda t: [],
None, float_types),
460 (
'__lshift__',
lambda t: torch.pow(2, cast_tensor(torch.arange(1, 5), t)),
461 lambda t: [2],
None, signed_types),
462 (
'__rshift__',
lambda t: torch.pow(2, cast_tensor(torch.arange(3, 7), t)),
463 lambda t: [2],
None, signed_types),
465 (
'qr', small_2d_lapack,
lambda t: [],
'square', float_types,
False,
466 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
467 (
'qr', small_2d_lapack_skinny,
lambda t: [],
'skinny', float_types,
False,
468 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
469 (
'qr', small_2d_lapack_fat,
lambda t: [],
'fat', float_types,
False,
470 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
471 (
'qr', large_2d_lapack,
lambda t: [],
'big', float_types,
False,
472 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
473 (
'geqrf', new_t(20, 20),
lambda t: [],
None, float_types,
False,
474 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
475 (
'svd', new_t(10, 10),
lambda t: [],
'square', float_types_no_half,
False,
476 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
477 (
'svd',
lambda t: new_t(10, 10)(t).t(),
lambda t: [
True],
'square_col_maj',
478 float_types_no_half,
False,
479 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
480 (
'svd', new_t(20, 5),
lambda t: [
True],
'tall_some', float_types_no_half,
False,
481 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
482 (
'svd', new_t(20, 5),
lambda t: [
False],
'tall_all', float_types_no_half,
False,
483 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
484 (
'svd',
lambda t: new_t(5, 20)(t).t(),
lambda t: [
True],
485 'tall_some_col_maj', float_types_no_half,
False,
486 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
487 (
'svd',
lambda t: new_t(5, 20)(t).t(),
lambda t: [
False],
488 'tall_all_col_maj', float_types_no_half,
False,
489 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
490 (
'eig', new_t(10, 10),
lambda t: [
True],
'with_eigvec', float_types_no_half,
False,
491 unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")),
509 custom_half_precision = {
570 for fn
in simple_pointwise:
571 tests.append((fn, small_3d,
lambda t: []))
573 simple_pointwise_float = [
604 for fn
in simple_pointwise_float:
605 tests.append((fn, small_3d,
lambda t: [],
None, float_types))
607 _cycles_per_ms =
None 610 def get_cycles_per_ms():
611 """Approximate number of cycles per millisecond for torch.cuda._sleep""" 612 global _cycles_per_ms
613 if _cycles_per_ms
is None:
614 start = torch.cuda.Event(enable_timing=
True)
615 end = torch.cuda.Event(enable_timing=
True)
620 _cycles_per_ms = 1000000 / start.elapsed_time(end)
621 return _cycles_per_ms
624 def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5):
626 cpu_tensor = tensor_constructor(t)
627 gpu_tensor = to_gpu(cpu_tensor)
628 cpu_args = arg_constructor(t)
629 gpu_args = [to_gpu(arg)
for arg
in cpu_args]
631 cpu_tensor = cpu_tensor.float()
632 cpu_args = [arg.float()
if isinstance(arg, torch.Tensor)
and is_half(arg)
else arg
for arg
in cpu_args]
633 cpu_result = getattr(cpu_tensor, fn)(*cpu_args)
635 gpu_result = getattr(gpu_tensor, fn)(*gpu_args)
636 except RuntimeError
as e:
638 data_type_reasons = {
'only supports floating-point types',
639 'unimplemented data type',
640 'not implemented for'}
641 if any(data_type_reason
in reason
for data_type_reason
in data_type_reasons):
642 raise unittest.SkipTest(
'unimplemented data type')
644 except AttributeError
as e:
646 if 'object has no attribute' in reason:
647 raise unittest.SkipTest(
'unimplemented data type')
650 self.assertEqual(cpu_tensor, gpu_tensor, precision)
651 self.assertEqual(cpu_args, gpu_args, precision)
653 if fn ==
'element_size' and t.__name__ ==
'HalfTensor':
655 self.assertEqual(2, gpu_result)
657 self.assertEqual(cpu_result, gpu_result, precision)
661 class TestCuda(TestCase):
662 _do_cuda_memory_leak_check =
True 663 FIFTY_MIL_CYCLES = 50000000
666 def _test_memory_stats_generator(self, device=None, N=35):
683 return torch.cuda.FloatTensor(*size)
685 def assert_change(comp=1, empty_cache=False, reset_max_alloc=False, reset_max_cached=False):
692 self.assertGreater(new_m, last_m_arr[0])
694 self.assertLess(new_m, last_m_arr[0])
696 self.assertEqual(new_m, last_m_arr[0])
697 self.assertLessEqual(new_m, new_max_m)
698 self.assertGreaterEqual(new_max_m, max_m_arr[0])
699 last_m_arr[0] = new_m
700 max_m_arr[0] = new_max_m
706 self.assertLessEqual(new_c, new_max_c)
707 self.assertGreaterEqual(new_max_c, max_c_arr[0])
708 last_c_arr[0] = new_c
709 max_c_arr[0] = new_max_c
715 self.assertLessEqual(new_c, last_c_arr[0])
716 self.assertLessEqual(new_c, new_max_c)
717 self.assertEqual(new_max_c, max_c_arr[0])
718 last_c_arr[0] = new_c
724 max_m_arr[0] = last_m_arr[0]
734 max_c_arr[0] = last_c_arr[0]
737 assert_change(0, reset_max_alloc=
True)
738 assert_change(0, empty_cache=
True)
739 assert_change(0, reset_max_cached=
True)
743 tensors1 = [alloc(1), alloc(10, 20), alloc(200, 300, 2000)]
750 for i
in range(1, int(N / 2) + 1):
752 tensors2.append(alloc(i, i * 4))
756 for i
in range(5, int(N / 2) + 5):
758 tensors2.append(alloc(i, i * 7, i * 9, i * 11))
759 assert_change(1, reset_max_alloc=(i % 2 == 0), reset_max_cached=(i % 2 == 1))
762 tensors2.append(alloc(0, 0, 0))
767 for i
in torch.randperm(len(tensors2)):
768 permute.append(tensors2[i])
779 assert_change(0, reset_max_alloc=
True)
782 for i
in range(int(N / 2)):
783 x = tensors2[i].numel()
788 for i
in range(2, int(2 * N / 3) + 2):
789 tensors2.append(alloc(i, i * 3, i * 8))
794 assert_change(-1, reset_max_cached=
True)
800 assert_change(-1, reset_max_alloc=
True)
804 assert_change(0, empty_cache=
True)
805 assert_change(0, reset_max_cached=
True)
806 assert_change(0, reset_max_alloc=
True)
808 def test_memory_stats(self):
810 for _
in self._test_memory_stats_generator(self):
813 def test_cuda_get_device_name(self):
818 self.assertEqual(current_device_name, device_name_None)
822 self.assertEqual(current_device_name, device_name_no_argument)
824 def test_cuda_get_device_capability(self):
829 self.assertEqual(current_device_capability, device_capability_None)
833 self.assertEqual(current_device_capability, device_capability_no_argument)
835 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
836 def test_memory_stats_multigpu(self):
838 def advance(gen, end):
842 except StopIteration:
848 gen0 = self._test_memory_stats_generator(self, device=
'cuda:0', N=35)
849 gen1 = self._test_memory_stats_generator(self, device=torch.device(
'cuda:1'), N=35)
851 while not (end0
and end1):
852 end0 = advance(gen0, end0)
853 end1 = advance(gen1, end1)
857 gen0 = self._test_memory_stats_generator(self, device=0, N=35)
858 gen1 = self._test_memory_stats_generator(self, device=torch.device(
'cuda:1'), N=35)
861 while not (end0
and end1):
862 end0 = advance(gen0, end0)
864 gen1_max_times = torch.LongTensor(1).random_(0, 3)[0]
868 while t < gen1_max_times
and not end1:
869 end1 = advance(gen1, end1)
872 def test_out_of_memory(self):
873 tensor = torch.zeros(1024, device=
'cuda')
875 with self.assertRaisesRegex(RuntimeError,
"Tried to allocate 80.00 GiB"):
876 torch.empty(1024 * 1024 * 1024 * 80, dtype=torch.int8, device=
'cuda')
880 self.assertTrue((tensor == 1).all())
882 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
883 def test_autogpu(self):
884 x = torch.randn(5, 5).cuda()
885 y = torch.randn(5, 5).cuda()
886 self.assertEqual(x.get_device(), 0)
887 self.assertEqual(x.get_device(), 0)
889 z = torch.randn(5, 5).cuda()
890 self.assertEqual(z.get_device(), 1)
892 self.assertEqual(q.get_device(), 0)
893 w = torch.randn(5, 5).cuda()
894 self.assertEqual(w.get_device(), 1)
895 self.assertEqual(y.cuda().get_device(), 1)
897 self.assertEqual(z.get_device(), 0)
899 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
901 x = torch.randn(3, 3).cuda()
902 self.assertEqual(x.new([0, 1, 2]).get_device(), 0)
903 self.assertEqual(x.new([0, 1, 2], device=1).get_device(), 1)
906 self.assertEqual(x.new([0, 1, 2]).get_device(), 0)
907 self.assertEqual(x.new([0, 1, 2], device=1).get_device(), 1)
909 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
910 def test_copy_device(self):
911 x = torch.randn(5, 5).cuda()
914 self.assertEqual(y.get_device(), 1)
915 self.assertIs(y.cuda(), y)
917 self.assertEqual(z.get_device(), 0)
918 self.assertIs(z.cuda(0), z)
920 x = torch.randn(5, 5)
923 self.assertEqual(y.get_device(), 1)
924 self.assertIs(y.cuda(), y)
926 self.assertEqual(z.get_device(), 0)
927 self.assertIs(z.cuda(0), z)
929 def _test_copy_sync_current_stream(self, x, y):
931 s0 = torch.cuda.Stream(device=x.device)
932 s1 = torch.cuda.Stream(device=y.device)
933 s2 = torch.cuda.Stream(device=x.device)
934 s3 = torch.cuda.Stream(device=y.device)
951 self.assertEqual(y, x)
964 self.assertEqual(y, x)
966 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
968 def test_copy_streams(self):
969 d0 = torch.device(
'cuda:0')
970 x0 = torch.zeros(5, 5, device=d0)
972 d1 = torch.device(
'cuda:1')
973 x1 = torch.zeros(5, 5, device=d1)
974 self._test_copy_sync_current_stream(x0, x1)
976 x2 = torch.zeros(5, 5, device=d0)
977 self._test_copy_sync_current_stream(x0, x2)
979 def test_copy_non_blocking(self):
980 x = torch.randn(5, 5).cuda()
981 y = torch.zeros(5, 5)
982 y.copy_(x, non_blocking=
True)
983 self.assertEqual(x, y)
985 x = torch.randn(5, 5)
986 y = torch.zeros(5, 5).cuda()
987 y.copy_(x, non_blocking=
True)
988 self.assertEqual(x, y)
990 def test_serialization_array_with_storage(self):
991 x = torch.randn(5, 5).cuda()
992 y = torch.IntTensor(2, 5).fill_(0).cuda()
993 q = [x, y, x, y.storage()]
994 with tempfile.NamedTemporaryFile()
as f:
997 q_copy = torch.load(f)
998 self.assertEqual(q_copy, q, 0)
1000 self.assertEqual(q_copy[0], q_copy[2], 0)
1001 self.assertTrue(isinstance(q_copy[0], torch.cuda.DoubleTensor))
1002 self.assertTrue(isinstance(q_copy[1], torch.cuda.IntTensor))
1003 self.assertTrue(isinstance(q_copy[2], torch.cuda.DoubleTensor))
1008 def test_type_conversions(self):
1009 x = torch.randn(5, 5)
1010 self.assertIsInstance(x.float(), torch.FloatTensor)
1011 self.assertIsInstance(x.cuda(), torch.cuda.DoubleTensor)
1012 self.assertIsInstance(x.cuda().float(), torch.cuda.FloatTensor)
1013 self.assertIsInstance(x.cuda().float().cpu(), torch.FloatTensor)
1014 self.assertIsInstance(x.cuda().float().cpu().int(), torch.IntTensor)
1023 def test_mul_intertype_scalar(self):
1024 def test_mul(dtype):
1028 self.assertEqual(x * y, 4.5)
1029 self.assertEqual(y * x, 4.5)
1030 with self.assertRaisesRegex(RuntimeError,
"doesn't match the desired type"):
1033 self.assertEqual(x, 4.5)
1035 test_mul(torch.float16)
1036 test_mul(torch.float32)
1037 test_mul(torch.float64)
1039 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
1040 def test_type_conversions_same_gpu(self):
1041 x = torch.randn(5, 5).cuda(1)
1042 self.assertEqual(x.int().get_device(), 1)
1043 self.assertEqual(x.type(torch.int).get_device(), 1)
1044 self.assertEqual(x.to(torch.int).get_device(), 1)
1047 _TestTorchMixin._test_neg(self,
lambda t: t.cuda())
1049 def test_isinf(self):
1050 _TestTorchMixin._test_isinf(self,
lambda t: t.cuda())
1052 def test_inplace_unary_mem_overlap(self):
1053 _TestTorchMixin._test_inplace_unary_mem_overlap(self, device=
'cuda')
1055 @unittest.skipIf(
not TEST_LARGE_TENSOR,
"not enough memory")
1056 def test_arithmetic_large_tensor(self):
1057 x = torch.empty(2**30, device=
'cuda')
1060 self.assertEqual(x.sum(), 2**30)
1063 self.assertEqual(x.sum(), 2**31)
1067 self.assertEqual(x.sum(), 2**29)
1071 self.assertEqual(x.sum(), 2**31)
1075 self.assertEqual(x.sum(), 2**29)
1077 def _test_broadcast(self, input):
1078 if not TEST_MULTIGPU:
1079 raise unittest.SkipTest(
"only one GPU detected")
1080 result = comm.broadcast(input, (0, 1))
1081 for i, t
in enumerate(result):
1082 self.assertEqual(t.get_device(), i)
1083 self.assertEqual(t, input)
1084 if input.is_cuda
and input.get_device() == i:
1085 self.assertEqual(t.data_ptr(), input.data_ptr())
1087 def test_broadcast_cpu(self):
1088 self._test_broadcast(torch.randn(5, 5))
1090 def test_broadcast_gpu(self):
1091 self._test_broadcast(torch.randn(5, 5).cuda())
1093 def test_min_max_nan(self):
1094 tests = [(
lambda x: x.min(),
'min'),
1095 (
lambda x: x.max(),
'max'),
1096 (
lambda x: x.min(0)[0],
'min_dim'),
1097 (
lambda x: x.max(0)[0],
'max_dim')]
1098 for f, name
in tests:
1099 a = torch.arange(25.0).view(5, 5)
1101 actual = f(a.cuda()).cpu()
1102 expected = f(a).cpu()
1103 self.assertEqual(torch.isnan(actual), torch.isnan(expected),
'nans for {}'.format(name))
1104 self.assertEqual(actual[~torch.isnan(actual)],
1105 expected[~torch.isnan(expected)],
'nans for {}'.format(name))
1108 def _test_broadcast_coalesced(self, tensors, buffer_size):
1109 b_tensors = [comm.broadcast(t, (0, 1))
for t
in tensors]
1110 for (_, bt), t
in zip(b_tensors, tensors):
1111 self.assertEqual(bt.get_device(), 1)
1112 self.assertEqual(bt, t)
1113 self.assertIsInstance(bt, type(t))
1115 bc_tensors = comm.broadcast_coalesced(tensors, (0, 1), buffer_size=buffer_size)
1116 bc_tensors_t = list(zip(*bc_tensors))
1117 self.assertEqual(b_tensors, bc_tensors_t)
1118 for (_, bt), (_, bct)
in zip(b_tensors, bc_tensors_t):
1119 self.assertEqual(bt.get_device(), bct.get_device())
1120 self.assertIsInstance(bct, type(bt))
1123 for out_tensors
in (b_tensors, bc_tensors_t):
1124 for inp_t, (out_t, _)
in zip(tensors, out_tensors):
1125 self.assertIs(inp_t, out_t)
1129 versions = [t._version
for _, t
in bc_tensors_t]
1130 for old_version, (_, t)
in zip(versions, bc_tensors_t):
1131 self.assertEqual(t._version, old_version)
1133 self.assertEqual(t._version, old_version + 1)
1135 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
1138 def test_broadcast_coalesced(self):
1140 num_bytes = numel * 8
1142 make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 1, 2, 3),
1143 torch.randn(numel).long().cuda(),
1144 torch.randn(numel).cuda(),
1145 make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 10, 2, 3),
1146 make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 5, 2, 3),
1147 make_sparse_tensor(torch.cuda.sparse.LongTensor, 7, 3, 3),
1148 make_sparse_tensor(torch.cuda.sparse.FloatTensor, 2, 2, 3),
1149 torch.randn(numel).long().cuda(),
1150 torch.randn(numel).long().cuda(),
1151 make_sparse_tensor(torch.cuda.sparse.LongTensor, 3, 2, 7),
1152 torch.randn(numel * 2).int().cuda(),
1153 torch.randn(numel).cuda(),
1155 self._test_broadcast_coalesced(self, tensors, num_bytes * 5 // 2)
1157 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
1158 def test_broadcast_coalesced_dense_only(self):
1160 num_bytes = numel * 8
1162 torch.randn(numel).long().cuda(),
1163 torch.randn(numel).cuda(),
1164 torch.randn(numel).long().cuda(),
1165 torch.randn(numel).long().cuda(),
1166 torch.randn(numel * 2).int().cuda(),
1167 torch.randn(numel).cuda(),
1169 self._test_broadcast_coalesced(self, tensors, num_bytes * 5 // 2)
1171 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
1172 def test_reduce_add(self):
1173 x = torch.randn(5, 5)
1174 y = torch.randn(5, 5)
1177 result = comm.reduce_add((x_cuda, y_cuda))
1178 self.assertEqual(result.get_device(), 0)
1179 self.assertEqual(result.cpu(), x + y)
1182 def _test_reduce_add_coalesced(self, tensors, buffer_size):
1183 dup_tensors = [tensors, list(map(
lambda t: t.cuda(1), tensors))]
1185 r_tensors = list(map(comm.reduce_add, zip(*dup_tensors)))
1186 for r, t
in zip(r_tensors, tensors):
1187 self.assertEqual(r.get_device(), t.get_device())
1188 self.assertEqual(r, t * 2)
1189 self.assertEqual(r.type(), t.type())
1191 rc_tensors = comm.reduce_add_coalesced(dup_tensors, buffer_size=buffer_size)
1192 self.assertEqual(r_tensors, rc_tensors)
1193 for r, rc
in zip(r_tensors, rc_tensors):
1194 self.assertEqual(rc.get_device(), r.get_device())
1195 self.assertEqual(rc.type(), r.type())
1200 versions = [t._version
for t
in rc_tensors]
1201 for old_version, t
in zip(versions, rc_tensors):
1202 self.assertEqual(t._version, old_version)
1204 self.assertEqual(t._version, old_version + 1)
1206 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
1207 def test_reduce_add_coalesced(self):
1209 num_bytes = numel * 8
1211 make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 1, 2, 3),
1212 torch.randn(numel).long().cuda(),
1213 torch.randn(numel).cuda(),
1214 make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 10, 2, 3),
1215 make_sparse_tensor(torch.cuda.sparse.DoubleTensor, 5, 2, 3),
1216 make_sparse_tensor(torch.cuda.sparse.LongTensor, 7, 3, 3),
1217 make_sparse_tensor(torch.cuda.sparse.FloatTensor, 2, 2, 3),
1218 torch.randn(numel).long().cuda(),
1219 torch.randn(numel).long().cuda(),
1220 make_sparse_tensor(torch.cuda.sparse.LongTensor, 3, 2, 7),
1221 torch.randn(numel * 2).int().cuda(),
1222 torch.randn(numel).cuda(),
1224 self._test_reduce_add_coalesced(self, tensors, num_bytes * 5 // 2)
1226 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
1227 def test_reduce_add_coalesced_dense_only(self):
1229 num_bytes = numel * 8
1231 torch.randn(numel).long().cuda(),
1232 torch.randn(numel).cuda(),
1233 torch.randn(numel).long().cuda(),
1234 torch.randn(numel).long().cuda(),
1235 torch.randn(numel * 2).int().cuda(),
1236 torch.randn(numel).cuda(),
1238 self._test_reduce_add_coalesced(self, tensors, num_bytes * 5 // 2)
1240 def _test_scatter(self, input, chunk_sizes=None, dim=0):
1241 if not TEST_MULTIGPU:
1242 raise unittest.SkipTest(
"only one GPU detected")
1243 result = comm.scatter(input, (0, 1), chunk_sizes, dim)
1244 self.assertEqual(len(result), 2)
1245 if chunk_sizes
is None:
1246 chunk_sizes = tuple(repeat(input.size(dim) // 2, 2))
1248 for i, r
in enumerate(result):
1249 chunk_end = chunk_start + chunk_sizes[i]
1250 index = [slice(
None,
None), slice(
None,
None)]
1251 index[dim] = slice(chunk_start, chunk_end)
1252 self.assertEqual(r, input[tuple(index)], 0)
1253 chunk_start = chunk_end
1255 def test_scatter_cpu(self):
1256 self._test_scatter(torch.randn(4, 4), dim=0)
1258 def test_scatter_cpu_dim(self):
1259 self._test_scatter(torch.randn(4, 4), dim=1)
1261 def test_scatter_cpu_neg_dim(self):
1262 self._test_scatter(torch.randn(4, 4), dim=-2)
1264 def test_scatter_cpu_sizes(self):
1265 self._test_scatter(torch.randn(6, 4), chunk_sizes=(2, 4))
1267 def test_scatter_gpu(self):
1268 self._test_scatter(torch.randn(4, 4).cuda(), dim=0)
1272 def test_scatter_gpu_dim(self):
1273 self._test_scatter(torch.randn(4, 4).cuda(), dim=1)
1275 def test_scatter_gpu_neg_dim(self):
1276 self._test_scatter(torch.randn(4, 4).cuda(), dim=-2)
1278 def test_scatter_gpu_sizes(self):
1279 self._test_scatter(torch.randn(6, 4).cuda(), chunk_sizes=(2, 4))
1281 def _test_gather(self, dim):
1282 if not TEST_MULTIGPU:
1283 raise unittest.SkipTest(
"only one GPU detected")
1284 x = torch.randn(2, 5).cuda(0)
1285 y = torch.randn(2, 5).cuda(1)
1286 result = comm.gather((x, y), dim)
1288 expected_size = list(x.size())
1289 expected_size[dim] += y.size(dim)
1290 expected_size = torch.Size(expected_size)
1291 self.assertEqual(result.get_device(), 0)
1292 self.assertEqual(result.size(), expected_size)
1294 index = [slice(
None,
None), slice(
None,
None)]
1295 index[dim] = slice(0, x.size(dim))
1296 self.assertEqual(result[tuple(index)], x)
1297 index[dim] = slice(x.size(dim), x.size(dim) + y.size(dim))
1298 self.assertEqual(result[tuple(index)], y)
1300 def test_gather(self):
1301 self._test_gather(0)
1304 def test_gather_dim(self):
1305 self._test_gather(1)
1307 def test_from_sequence(self):
1308 seq = [list(range(i * 4, i * 4 + 4))
for i
in range(5)]
1309 reference = torch.arange(0, 20).resize_(5, 4)
1311 cuda_type = get_gpu_type(t)
1312 self.assertEqual(cuda_type(seq), reference)
1314 def test_torch_manual_seed_seeds_cuda_devices(self):
1315 with freeze_rng_state():
1316 x = torch.zeros(4, 4).float().cuda()
1317 torch.manual_seed(2)
1318 self.assertEqual(torch.cuda.initial_seed(), 2)
1320 torch.manual_seed(2)
1321 y = x.clone().uniform_()
1322 self.assertEqual(x, y)
1323 self.assertEqual(torch.cuda.initial_seed(), 2)
1325 def test_manual_seed(self):
1326 with freeze_rng_state():
1327 x = torch.zeros(4, 4).float().cuda()
1328 torch.cuda.manual_seed(2)
1329 self.assertEqual(torch.cuda.initial_seed(), 2)
1331 a = torch.bernoulli(torch.full_like(x, 0.5))
1332 torch.cuda.manual_seed(2)
1333 y = x.clone().uniform_()
1334 b = torch.bernoulli(torch.full_like(x, 0.5))
1335 self.assertEqual(x, y)
1336 self.assertEqual(a, b)
1337 self.assertEqual(torch.cuda.initial_seed(), 2)
1339 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
1340 def test_cat_autogpu(self):
1341 x = torch.randn(4, 4).cuda(1)
1342 y = torch.randn(4, 4).cuda(1)
1343 z = torch.cat([x, y], 0)
1344 self.assertEqual(z.get_device(), x.get_device())
1346 def test_clamp(self):
1347 _TestTorchMixin._test_clamp(self,
'cuda')
1351 for dim
in range(-3, 3):
1352 pos_dim = dim
if dim >= 0
else 3 + dim
1353 x = torch.rand(13, SIZE, SIZE).transpose(0, pos_dim).cuda()
1354 y = torch.rand(17, SIZE, SIZE).transpose(0, pos_dim).cuda()
1355 z = torch.rand(19, SIZE, SIZE).transpose(0, pos_dim).cuda()
1357 res1 = torch.cat((x, y, z), dim)
1358 self.assertEqual(res1.narrow(pos_dim, 0, 13), x, 0)
1359 self.assertEqual(res1.narrow(pos_dim, 13, 17), y, 0)
1360 self.assertEqual(res1.narrow(pos_dim, 30, 19), z, 0)
1362 x = torch.randn(20, SIZE, SIZE).cuda()
1363 self.assertEqual(torch.cat(torch.split(x, 7)), x)
1364 self.assertEqual(torch.cat(torch.chunk(x, 7)), x)
1366 y = torch.randn(1, SIZE, SIZE).cuda()
1367 z = torch.cat([x, y])
1368 self.assertEqual(z.size(), (21, SIZE, SIZE))
1370 def test_cat_empty_legacy(self):
1371 _TestTorchMixin._test_cat_empty_legacy(self, use_cuda=
True)
1373 def test_cat_empty(self):
1374 _TestTorchMixin._test_cat_empty(self, use_cuda=
True)
1376 def test_bernoulli(self):
1377 _TestTorchMixin._test_bernoulli(self, torch.float32, torch.float64,
'cuda')
1378 _TestTorchMixin._test_bernoulli(self, torch.float32, torch.float16,
'cuda')
1379 _TestTorchMixin._test_bernoulli(self, torch.float16, torch.float64,
'cuda')
1380 _TestTorchMixin._test_bernoulli(self, torch.float16, torch.float16,
'cuda')
1382 _TestTorchMixin._test_bernoulli(self, torch.uint8, torch.float64,
'cuda')
1383 _TestTorchMixin._test_bernoulli(self, torch.uint8, torch.float16,
'cuda')
1384 _TestTorchMixin._test_bernoulli(self, torch.int64, torch.float64,
'cuda')
1385 _TestTorchMixin._test_bernoulli(self, torch.int64, torch.float16,
'cuda')
1387 def test_cat_bad_input_sizes(self):
1388 x = torch.randn(2, 1).cuda()
1389 y = torch.randn(2, 1, 1).cuda()
1390 z = torch.randn(2, 1, 1).cuda()
1391 self.assertRaises(RuntimeError,
lambda: torch.cat([x, y, z]))
1393 x = torch.randn(2, 1, 2).cuda()
1394 y = torch.randn(2, 1, 1).cuda()
1395 z = torch.randn(2, 2, 1).cuda()
1396 self.assertRaises(RuntimeError,
lambda: torch.cat([x, y, z], dim=1))
1399 @unittest.skipIf(
not PY3,
"Tensor was serialized with Python 3")
1400 def test_load_nonexistent_device(self):
1402 tensor = torch.randn(2, device=
'cuda')
1404 torch.save(tensor, buf)
1406 buf = io.BytesIO(buf.getvalue().replace(b
'cuda:0', b
'cuda:9'))
1408 msg =
r'Attempting to deserialize object on CUDA device 9' 1409 with self.assertRaisesRegex(RuntimeError, msg):
1412 def test_serialization(self):
1413 x = torch.randn(4, 4).cuda()
1414 with tempfile.NamedTemporaryFile()
as f:
1417 x_copy = torch.load(f)
1418 self.assertEqual(x_copy, x)
1419 self.assertIs(type(x_copy), type(x))
1420 self.assertEqual(x_copy.get_device(), x.get_device())
1422 def test_serialization_array_with_empty(self):
1423 x = [torch.randn(4, 4).cuda(), torch.cuda.FloatTensor()]
1424 with tempfile.NamedTemporaryFile()
as f:
1427 x_copy = torch.load(f)
1428 for original, copy
in zip(x, x_copy):
1429 self.assertEqual(copy, original)
1430 self.assertIs(type(copy), type(original))
1431 self.assertEqual(copy.get_device(), original.get_device())
1433 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1434 def test_multigpu_serialization(self):
1435 x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
1436 with tempfile.NamedTemporaryFile()
as f:
1439 x_copy = torch.load(f)
1440 for original, copy
in zip(x, x_copy):
1441 self.assertEqual(copy, original)
1442 self.assertIs(type(copy), type(original))
1443 self.assertEqual(copy.get_device(), original.get_device())
1445 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1446 def test_multigpu_serialization_remap(self):
1447 x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
1449 def gpu_remap(storage, location):
1450 if location ==
'cuda:1':
1451 return storage.cuda(0)
1453 with tempfile.NamedTemporaryFile()
as f:
1456 x_copy = torch.load(f, map_location=gpu_remap)
1458 for original, copy
in zip(x, x_copy):
1459 self.assertEqual(copy, original)
1460 self.assertIs(type(copy), type(original))
1461 self.assertEqual(copy.get_device(), 0)
1463 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1464 def test_multigpu_serialization_remap_dict(self):
1465 x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
1466 with tempfile.NamedTemporaryFile()
as f:
1469 x_copy = torch.load(f, map_location={
'cuda:1':
'cuda:0'})
1470 for original, copy
in zip(x, x_copy):
1471 self.assertEqual(copy, original)
1472 self.assertIs(type(copy), type(original))
1473 self.assertEqual(copy.get_device(), 0)
1475 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1476 def test_multigpu_storage_clone(self):
1477 x = torch.randn(4, 4, device=
'cuda:1').
storage()
1479 self.assertEqual(x.get_device(), y.get_device())
1480 for t
in [
'byte',
'char',
'short',
'int',
'long',
'half',
'double']:
1481 self.assertEqual(getattr(x, t)().get_device(), x.get_device())
1483 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1484 def test_cuda_set_device(self):
1485 x = torch.randn(5, 5)
1487 self.assertEqual(x.cuda().get_device(), 1)
1489 self.assertEqual(x.cuda().get_device(), 0)
1491 self.assertEqual(x.cuda().get_device(), 1)
1492 self.assertEqual(x.cuda().get_device(), 0)
1494 self.assertEqual(x.cuda().get_device(), 0)
1496 def test_is_tensor(self):
1498 tensor = get_gpu_type(t)()
1502 def test_cuda_synchronize(self):
1505 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1507 def test_current_stream(self):
1508 d0 = torch.device(
'cuda:0')
1509 d1 = torch.device(
'cuda:1')
1515 self.assertEqual(d0, s0.device)
1516 self.assertEqual(d1, s1.device)
1517 self.assertEqual(d0, s2.device)
1518 self.assertEqual(s0, s2)
1525 self.assertEqual(d1, s0.device)
1526 self.assertEqual(d1, s1.device)
1527 self.assertEqual(d0, s2.device)
1528 self.assertEqual(s0, s1)
1530 with self.assertRaisesRegex(ValueError,
1531 "Expected a cuda device, but got: cpu"):
1534 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1536 def test_default_stream(self):
1537 d0 = torch.device(
'cuda:0')
1538 d1 = torch.device(
'cuda:1')
1549 self.assertEqual(d0, s0.device)
1550 self.assertEqual(d1, s1.device)
1551 self.assertEqual(d0, s2.device)
1552 self.assertEqual(d1, s3.device)
1553 self.assertEqual(s0, s2)
1554 self.assertEqual(s1, s3)
1562 with self.assertRaisesRegex(ValueError,
1563 "Expected a cuda device, but got: cpu"):
1566 def test_streams(self):
1568 user_stream = torch.cuda.Stream()
1570 self.assertNotEqual(default_stream, user_stream)
1571 self.assertEqual(default_stream.cuda_stream, 0)
1572 self.assertNotEqual(user_stream.cuda_stream, 0)
1575 self.assertTrue(user_stream.query())
1577 tensor1 = torch.ByteTensor(10000000).pin_memory()
1578 tensor2 = tensor1.cuda(non_blocking=
True)
1579 self.assertFalse(default_stream.query())
1580 default_stream.synchronize()
1581 self.assertTrue(default_stream.query())
1583 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1585 def test_stream_event_device(self):
1586 d0 = torch.device(
'cuda:0')
1587 d1 = torch.device(
'cuda:1')
1588 e0 = torch.cuda.Event()
1590 self.assertEqual(
None, e0.device)
1597 s1 = torch.cuda.Stream()
1598 e1 = s1.record_event()
1600 self.assertEqual(s0.device, torch.device(
'cuda:0'))
1601 self.assertEqual(e0.device, torch.device(
'cuda:0'))
1602 self.assertEqual(s1.device, torch.device(
'cuda:1'))
1603 self.assertEqual(e1.device, torch.device(
'cuda:1'))
1605 def test_stream_event_repr(self):
1607 self.assertTrue(
"torch.cuda.Stream" in s.__repr__())
1608 e = torch.cuda.Event()
1609 self.assertTrue(
"torch.cuda.Event" in e.__repr__())
1611 self.assertTrue(
"torch.cuda.Event" in e.__repr__())
1613 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1616 def test_stream_context(self):
1618 s1 = torch.cuda.Stream(device=1)
1619 s2 = torch.cuda.Stream(device=0)
1646 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1648 def test_streams_multi_gpu(self):
1650 self.assertEqual(default_stream.device, torch.device(
'cuda:0'))
1651 stream = torch.cuda.Stream(device=1)
1652 self.assertEqual(stream.device, torch.device(
'cuda:1'))
1658 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1660 def test_streams_multi_gpu_query(self):
1661 d0 = torch.device(
'cuda:0')
1662 d1 = torch.device(
'cuda:1')
1671 self.assertTrue(s0.query())
1672 self.assertFalse(s1.query())
1675 self.assertTrue(s0.query())
1676 self.assertFalse(s1.query())
1679 self.assertTrue(s0.query())
1680 self.assertFalse(s1.query())
1686 self.assertTrue(s0.query())
1687 self.assertTrue(s1.query())
1690 self.assertTrue(s0.query())
1691 self.assertTrue(s1.query())
1694 self.assertTrue(s0.query())
1695 self.assertTrue(s1.query())
1697 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1698 def test_streams_multi_gpu_eq(self):
1699 d0 = torch.device(
'cuda:0')
1700 d1 = torch.device(
'cuda:1')
1710 self.assertTrue(s0 == s0)
1711 self.assertTrue(s0 == s1)
1712 self.assertTrue(s2 == s2)
1713 self.assertTrue(s2 == s3)
1714 self.assertFalse(s0 == s2)
1715 self.assertFalse(s1 == s3)
1717 self.assertEqual(s0.device, s1.device)
1718 self.assertEqual(s0.cuda_stream, s1.cuda_stream)
1719 self.assertEqual(s2.device, s3.device)
1720 self.assertEqual(s2.cuda_stream, s3.cuda_stream)
1721 self.assertNotEqual(s0.device, s3.device)
1723 self.assertEqual(hash(s0), hash(s1))
1724 self.assertEqual(hash(s2), hash(s3))
1725 self.assertNotEqual(hash(s0), hash(s3))
1727 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
1729 def test_streams_priority(self):
1730 low, high = torch.cuda.Stream.priority_range()
1731 s0 = torch.cuda.Stream(device=0, priority=low)
1733 self.assertEqual(low, s0.priority)
1734 self.assertEqual(torch.device(
'cuda:0'), s0.device)
1736 s1 = torch.cuda.Stream(device=1, priority=high)
1738 self.assertEqual(high, s1.priority)
1739 self.assertEqual(torch.device(
'cuda:1'), s1.device)
1741 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
1742 def test_tensor_device(self):
1743 self.assertEqual(torch.cuda.FloatTensor(1).get_device(), 0)
1744 self.assertEqual(torch.cuda.FloatTensor(1, device=1).get_device(), 1)
1746 self.assertEqual(torch.cuda.FloatTensor(1).get_device(), 1)
1747 self.assertEqual(torch.cuda.FloatTensor(1, device=0).get_device(), 0)
1748 self.assertEqual(torch.cuda.FloatTensor(1, device=
None).get_device(), 1)
1751 def test_events(self):
1753 event = torch.cuda.Event(enable_timing=
True)
1754 self.assertTrue(event.query())
1755 start_event = torch.cuda.Event(enable_timing=
True)
1756 stream.record_event(start_event)
1758 stream.record_event(event)
1759 self.assertFalse(event.query())
1761 self.assertTrue(event.query())
1762 self.assertGreater(start_event.elapsed_time(event), 0)
1765 def _stream_synchronize(self, spin_time_cycles):
1767 e_tik = torch.cuda.Event(enable_timing=
True)
1768 e_tok = torch.cuda.Event(enable_timing=
True)
1775 self.assertTrue(s.query())
1779 return e_tik.elapsed_time(e_tok)
1782 def _event_synchronize(self, spin_time_cycles):
1784 e_tik = torch.cuda.Event(enable_timing=
True)
1785 e_tok = torch.cuda.Event(enable_timing=
True)
1789 s.record_event(e_tok)
1792 self.assertTrue(s.query())
1796 return e_tik.elapsed_time(e_tok)
1799 def _event_wait(self, spin_time_cycles):
1801 s1 = torch.cuda.Stream()
1802 e_tik = torch.cuda.Event(blocking=
True, enable_timing=
True)
1803 e_tok = torch.cuda.Event(blocking=
True, enable_timing=
True)
1807 e_sync = torch.cuda.Event(blocking=
True)
1813 s1.record_event(e_tok)
1815 self.assertTrue(s0.query())
1816 self.assertTrue(s1.query())
1817 self.assertTrue(e_sync.query())
1821 return e_tik.elapsed_time(e_tok)
1824 def _test_stream_event_nogil(self, sync_func, p2c, c2p):
1828 c2p.put(sync_func(self, TestCuda.FIFTY_MIL_CYCLES))
1830 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1832 def test_stream_event_nogil(self):
1833 for sync_func
in [TestCuda._stream_synchronize,
1834 TestCuda._event_synchronize,
1835 TestCuda._event_wait]:
1838 e_tik = torch.cuda.Event(enable_timing=
True)
1839 e_tok = torch.cuda.Event(enable_timing=
True)
1841 t = threading.Thread(
1842 target=TestCuda._test_stream_event_nogil,
1843 args=(self, sync_func, p2c, c2p))
1851 parent_time = sync_func(self, TestCuda.FIFTY_MIL_CYCLES)
1852 child_time = c2p.get()
1855 total_time = e_tik.elapsed_time(e_tok)
1865 self.assertGreater(parent_time + child_time, total_time * 1.4)
1867 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1869 def test_events_wait(self):
1870 d0 = torch.device(
'cuda:0')
1871 d1 = torch.device(
'cuda:1')
1876 e0 = torch.cuda.Event()
1882 self.assertFalse(s0.query())
1883 self.assertTrue(s1.query())
1888 self.assertTrue(e0.query())
1889 self.assertTrue(s0.query())
1890 self.assertTrue(s1.query())
1892 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1894 def test_events_multi_gpu_query(self):
1895 d0 = torch.device(
'cuda:0')
1896 d1 = torch.device(
'cuda:1')
1900 e0 = s0.record_event()
1905 e1 = s1.record_event()
1907 self.assertTrue(e0.query())
1908 self.assertFalse(e1.query())
1911 self.assertTrue(e0.query())
1912 self.assertFalse(e1.query())
1915 self.assertTrue(e0.query())
1916 self.assertFalse(e1.query())
1922 self.assertTrue(e0.query())
1923 self.assertTrue(e1.query())
1926 self.assertTrue(e0.query())
1927 self.assertTrue(e1.query())
1930 self.assertTrue(e0.query())
1931 self.assertTrue(e1.query())
1933 @unittest.skipIf(
not TEST_MULTIGPU,
"detected only one GPU")
1935 def test_events_multi_gpu_elapsed_time(self):
1936 d0 = torch.device(
'cuda:0')
1937 d1 = torch.device(
'cuda:1')
1941 e0 = torch.cuda.Event(enable_timing=
True)
1947 e1 = torch.cuda.Event(enable_timing=
True)
1954 with self.assertRaises(RuntimeError):
1955 self.assertGreater(e0.elapsed_time(e1), 0)
1958 with self.assertRaises(RuntimeError):
1959 self.assertGreater(e0.elapsed_time(e1), 0)
1963 e2 = torch.cuda.Event(enable_timing=
True)
1968 self.assertGreater(e0.elapsed_time(e2), 0)
1972 self.assertGreater(e0.elapsed_time(e2), 0)
1975 def test_record_stream(self):
1976 cycles_per_ms = get_cycles_per_ms()
1978 t = torch.FloatTensor([1, 2, 3, 4]).pin_memory()
1979 result = torch.cuda.FloatTensor(t.size())
1980 stream = torch.cuda.Stream()
1986 tmp = t.cuda(non_blocking=
True)
1987 ptr[0] = tmp.data_ptr()
1995 tmp2 = torch.cuda.FloatTensor(t.size())
1997 self.assertNotEqual(tmp2.data_ptr(), ptr[0],
'allocation re-used to soon')
1999 self.assertEqual(result.tolist(), [1, 2, 3, 4])
2004 tmp3 = torch.cuda.FloatTensor(t.size())
2005 self.assertEqual(tmp3.data_ptr(), ptr[0],
'allocation not re-used')
2007 def test_noncontiguous_pinned_memory(self):
2009 x = torch.arange(0, 10).view((2, 5))
2010 self.assertEqual(x.t(), x.t().pin_memory())
2013 def test_caching_pinned_memory(self):
2014 cycles_per_ms = get_cycles_per_ms()
2017 t = torch.FloatTensor([1]).pin_memory()
2020 t = torch.FloatTensor([1]).pin_memory()
2021 self.assertEqual(t.data_ptr(), ptr,
'allocation not reused')
2024 gpu_tensor = torch.cuda.FloatTensor([0])
2026 gpu_tensor.copy_(t, non_blocking=
True)
2028 t = torch.FloatTensor([1]).pin_memory()
2029 self.assertNotEqual(t.data_ptr(), ptr,
'allocation re-used too soon')
2030 self.assertEqual(list(gpu_tensor), [1])
2032 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
2034 def test_caching_pinned_memory_multi_gpu(self):
2037 cycles_per_ms = get_cycles_per_ms()
2039 t = torch.FloatTensor([1]).pin_memory()
2041 gpu_tensor0 = torch.cuda.FloatTensor([0], device=0)
2042 gpu_tensor1 = torch.cuda.FloatTensor([0], device=1)
2046 gpu_tensor1.copy_(t, non_blocking=
True)
2049 t = torch.FloatTensor([2]).pin_memory()
2050 self.assertNotEqual(t.data_ptr(), ptr,
'allocation re-used too soon')
2053 gpu_tensor0.copy_(t, non_blocking=
True)
2055 self.assertEqual(gpu_tensor1[0], 1)
2056 self.assertEqual(gpu_tensor0[0], 2)
2058 def test_reduction_gpu_memory_accessing(self):
2059 x = torch.ones(512, 8, dtype=torch.float32, device=
'cuda')
2062 def test_sum_cpu_gpu_mismatch(self):
2063 x = torch.randn(20, dtype=torch.float32, device=
'cuda')
2064 y = torch.randn(1, dtype=torch.float32)
2065 with self.assertRaisesRegex(RuntimeError,
'expected type' 2066 ' torch.FloatTensor but got' 2067 ' torch.cuda.FloatTensor'):
2068 torch.sum(x, dim=[0], dtype=torch.float32, out=y)
2071 with self.assertRaisesRegex(RuntimeError,
'expected type' 2072 ' torch.FloatTensor but got' 2073 ' torch.cuda.HalfTensor'):
2074 torch.sum(x, dim=[0], dtype=torch.float32, out=y)
2077 def test_sum_noncontig(self):
2078 x = torch.randn(1, 75, 57, 20, device=
'cuda').permute(0, 3, 1, 2)
2080 self.assertEqual(x.sum().cpu(), y.sum())
2081 self.assertEqual(x.sum(dim=(-1, -2)).cpu(), y.sum(dim=(-1, -2)))
2082 self.assertEqual(x.sum(dim=(1, 3)).cpu(), y.sum(dim=(1, 3)))
2084 def test_sum_fp16(self):
2085 x = torch.zeros(10, device=
'cuda', dtype=torch.float16)
2086 self.assertEqual(x.sum(), 0)
2088 x = torch.ones(65504, device=
'cuda', dtype=torch.float16)
2089 self.assertEqual(x.sum(), 65504)
2090 self.assertEqual(x.sum(dtype=torch.float32), 65504)
2092 x = torch.ones(65536, device=
'cuda', dtype=torch.float16)
2093 self.assertEqual(x.sum(dtype=torch.float32), 65536)
2095 a = torch.zeros(1203611).bernoulli_(0.0005)
2096 x = a.to(device=
'cuda', dtype=torch.float16)
2097 self.assertEqual(x.sum().item(), a.sum().item())
2099 a = torch.zeros(100, 121, 80).bernoulli_(0.0005)
2100 x = a.to(device=
'cuda', dtype=torch.float16)
2101 self.assertEqual(x.sum((0, 2)).float().cpu(), a.sum((0, 2)))
2103 def test_mean_fp16(self):
2104 x = torch.ones(65536, device=
'cuda', dtype=torch.float16)
2105 self.assertEqual(x.mean(), 1)
2107 x = torch.ones(65536, device=
'cuda', dtype=torch.float16)
2108 self.assertEqual(x.mean(dtype=torch.float32), 1)
2110 def test_prod_large(self):
2112 x = torch.ones(240000, device=
'cuda', dtype=torch.float32)
2113 self.assertEqual(x.prod(), 1)
2116 def _select_broadcastable_dims(dims_full=None):
2117 return _TestTorchMixin._select_broadcastable_dims(dims_full)
2120 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2121 def test_inverse(self):
2122 _TestTorchMixin._test_inverse(self,
lambda t: t.cuda())
2124 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2125 def test_pinverse(self):
2126 _TestTorchMixin._test_pinverse(self,
lambda t: t.cuda())
2128 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2129 def test_matrix_rank(self):
2130 _TestTorchMixin._test_matrix_rank(self,
lambda x: x.cuda())
2132 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2133 def test_matrix_power(self):
2134 _TestTorchMixin._test_matrix_power(self, conv_fn=
lambda t: t.cuda())
2136 def test_chain_matmul(self):
2137 _TestTorchMixin._test_chain_matmul(self, cast=
lambda t: t.cuda())
2139 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2140 def test_det_logdet_slogdet(self):
2141 _TestTorchMixin._test_det_logdet_slogdet(self,
lambda t: t.cuda())
2143 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2144 def test_solve(self):
2145 _TestTorchMixin._test_solve(self,
lambda t: t.cuda())
2147 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2148 def test_solve_batched(self):
2149 _TestTorchMixin._test_solve_batched(self,
lambda t: t.cuda())
2151 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2152 def test_solve_batched_dims(self):
2153 _TestTorchMixin._test_solve_batched_dims(self,
lambda t: t.cuda())
2155 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2156 def test_cholesky_solve(self):
2157 _TestTorchMixin._test_cholesky_solve(self,
lambda t: t.cuda())
2159 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2160 def test_cholesky_solve_batched(self):
2161 _TestTorchMixin._test_cholesky_solve_batched(self,
lambda t: t.cuda())
2163 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2164 def test_cholesky_solve_batched_dims(self):
2165 _TestTorchMixin._test_cholesky_solve_batched_dims(self,
lambda t: t.cuda())
2167 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2168 def test_cholesky(self):
2169 _TestTorchMixin._test_cholesky(self,
lambda t: t.cuda())
2171 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2172 def test_cholesky_batched(self):
2173 _TestTorchMixin._test_cholesky_batched(self,
lambda t: t.cuda())
2175 def test_view(self):
2176 _TestTorchMixin._test_view(self,
lambda t: t.cuda())
2178 def test_flip(self):
2179 _TestTorchMixin._test_flip(self, use_cuda=
True)
2181 def test_rot90(self):
2182 _TestTorchMixin._test_rot90(self, use_cuda=
True)
2184 def test_signal_window_functions(self):
2185 _TestTorchMixin._test_signal_window_functions(self, device=torch.device(
'cuda'))
2188 def test_fft_ifft_rfft_irfft(self):
2189 _TestTorchMixin._test_fft_ifft_rfft_irfft(self, device=torch.device(
'cuda'))
2192 def plan_cache_max_size(n):
2193 original = torch.backends.cuda.cufft_plan_cache.max_size
2194 torch.backends.cuda.cufft_plan_cache.max_size = n
2196 torch.backends.cuda.cufft_plan_cache.max_size = original
2198 with plan_cache_max_size(max(1, torch.backends.cuda.cufft_plan_cache.size - 10)):
2199 _TestTorchMixin._test_fft_ifft_rfft_irfft(self, device=torch.device(
'cuda'))
2201 with plan_cache_max_size(0):
2202 _TestTorchMixin._test_fft_ifft_rfft_irfft(self, device=torch.device(
'cuda'))
2204 torch.backends.cuda.cufft_plan_cache.clear()
2207 with plan_cache_max_size(10):
2208 _TestTorchMixin._test_fft_ifft_rfft_irfft(self, device=torch.device(
'cuda'))
2210 with self.assertRaisesRegex(RuntimeError,
r"must be non-negative"):
2211 torch.backends.cuda.cufft_plan_cache.max_size = -1
2213 with self.assertRaisesRegex(RuntimeError,
r"read-only property"):
2214 torch.backends.cuda.cufft_plan_cache.size = -1
2216 def test_stft(self):
2217 _TestTorchMixin._test_stft(self, device=torch.device(
'cuda'))
2219 def test_multinomial(self):
2220 _TestTorchMixin._test_multinomial(self, torch.cuda.FloatTensor)
2223 freqs = torch.cuda.FloatTensor([
2224 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
2225 0.03178183361887932, 0.027680952101945877, 0.033176131546497345,
2226 0.046052902936935425, 0.07742464542388916, 0.11543981730937958,
2227 0.14148041605949402, 0.15784293413162231, 0.13180233538150787,
2228 0.08271478116512299, 0.049702685326337814, 0.027557924389839172,
2229 0.018125897273421288, 0.011851548217236996, 0.010252203792333603,
2230 0.007422595750540495, 0.005372154992073774, 0.0045109698548913,
2231 0.0036087757907807827, 0.0035267581697553396, 0.0018864056328311563,
2232 0.0024605290964245796, 0.0022964938543736935, 0.0018453967059031129,
2233 0.0010662291897460818, 0.0009842115687206388, 0.00045109697384759784,
2234 0.0007791675161570311, 0.00020504408166743815, 0.00020504408166743815,
2235 0.00020504408166743815, 0.00012302644609007984, 0.0,
2236 0.00012302644609007984, 4.100881778867915e-05, 0.0, 0.0, 0.0, 0.0,
2239 torch.cuda.manual_seed(11042)
2240 sample = torch.multinomial(freqs, 1000,
True)
2241 self.assertNotEqual(freqs[sample].min(), 0)
2243 p = torch.zeros(3421, 2, device=
"cuda", dtype=torch.float)
2245 torch.cuda.manual_seed(5214)
2246 r = torch.multinomial(p, 1)
2247 self.assertNotEqual(r.min().item(), 0)
2250 torch.cuda.manual_seed(33)
2251 probs = torch.randn(1000000, device=
'cuda').clamp(min=0) * 3e-5
2252 samples = probs.multinomial(1000000, replacement=
True)
2253 self.assertGreater(probs[samples].min().item(), 0)
2257 os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stderr.fileno())
2259 def _spawn_method(self, method, arg):
2260 ctx = mp.get_context(
"spawn")
2261 with ctx.Pool(1, initializer=self.mute)
as pool:
2262 errors = pool.map(method, [arg])
2264 if 'device-side assert triggered' not in str(e):
2268 def _test_multinomial_invalid_probs_cuda(probs):
2271 torch.multinomial(probs.to(
'cuda'), 2)
2274 except RuntimeError
as e:
2277 @unittest.skipIf(NO_MULTIPROCESSING_SPAWN,
"Disabled for environments that \ 2278 don't support multiprocessing with spawn start method")
2279 @unittest.skipIf(IS_WINDOWS,
'FIXME: CUDA OOM error on Windows')
2280 @unittest.skipIf(
not PY3,
2281 "spawn start method is not supported in Python 2, \ 2282 but we need it for creating another process with CUDA")
2283 def test_multinomial_invalid_probs_cuda(self):
2284 test_method = TestCuda._test_multinomial_invalid_probs_cuda
2285 self._spawn_method(test_method, torch.Tensor([1, -1, 1]))
2286 self._spawn_method(test_method, torch.Tensor([1, inf, 1]))
2287 self._spawn_method(test_method, torch.Tensor([1, -inf, 1]))
2288 self._spawn_method(test_method, torch.Tensor([1, 1, nan]))
2289 self._spawn_method(test_method, torch.Tensor([0, 1, 0]))
2291 def test_broadcast(self):
2292 _TestTorchMixin._test_broadcast(self,
lambda t: t.cuda())
2294 def test_contiguous(self):
2295 _TestTorchMixin._test_contiguous(self,
lambda t: t.cuda())
2297 def test_broadcast_fused_matmul(self):
2298 _TestTorchMixin._test_broadcast_fused_matmul(self,
lambda t: t.cuda())
2300 def test_broadcast_batched_matmul(self):
2301 _TestTorchMixin._test_broadcast_batched_matmul(self,
lambda t: t.cuda())
2303 def test_index(self):
2304 _TestTorchMixin._test_index(self,
lambda t: t.cuda())
2306 def test_advancedindex(self):
2307 _TestTorchMixin._test_advancedindex(self,
lambda t: t.cuda())
2309 def test_advancedindex_mixed_cpu_cuda(self):
2310 def test(x, ia, ib):
2312 self.assertEqual(x[:, ia,
None, ib, 0].cpu(),
2313 x.cpu()[:, ia.cpu(),
None, ib.cpu(), 0])
2314 self.assertEqual(x[ia], x.cpu()[ia.cpu()])
2316 x_clone1 = x.clone()
2317 x_clone2 = x.clone()
2318 first_shape = x[:, ia,
None, ib, 0].shape
2319 second_shape = x[ia].shape
2320 x_clone1[:, ia,
None, ib, 0] = torch.randn(first_shape).to(x_clone1)
2321 x_clone2[ia] = torch.randn(second_shape).to(x_clone2)
2323 cpu = torch.device(
'cpu')
2326 x = torch.randn(3, 4, 4, 4, 3)
2350 other_device =
'cuda:0' if device !=
'cuda:0' else 'cuda:1' 2354 ib = ib.to(other_device)
2357 def test_advancedindex_big(self):
2358 _TestTorchMixin._test_advancedindex_big(self,
lambda t: t.cuda())
2360 def test_kthvalue(self):
2361 _TestTorchMixin._test_kthvalue(self, device=
'cuda')
2364 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2365 def test_btrifact(self):
2366 _TestTorchMixin._test_btrifact(self,
lambda t: t.cuda())
2369 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2370 def test_btrisolve(self):
2371 _TestTorchMixin._test_btrisolve(self,
lambda t: t.cuda())
2374 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2375 def test_btriunpack(self):
2376 _TestTorchMixin._test_btriunpack(self,
lambda t: t.cuda())
2378 def test_dim_reduction(self):
2379 _TestTorchMixin._test_dim_reduction(self,
lambda t: t.cuda())
2381 def test_tensor_gather(self):
2382 _TestTorchMixin._test_gather(self,
lambda t: t.cuda(),
False)
2384 def test_tensor_scatter(self):
2385 _TestTorchMixin._test_scatter_base(self,
lambda t: t.cuda(),
'scatter_', test_bounds=
False)
2387 def test_tensor_scatterAdd(self):
2388 _TestTorchMixin._test_scatter_base(self,
lambda t: t.cuda(),
'scatter_add_', test_bounds=
False)
2390 def test_tensor_scatterFill(self):
2391 _TestTorchMixin._test_scatter_base(self,
lambda t: t.cuda(),
'scatter_',
True, test_bounds=
False)
2393 def test_min_max_inits(self):
2396 x = torch.cuda.ByteTensor([0])
2397 y = torch.cuda.ByteTensor([255])
2398 expected = torch.cuda.LongTensor([0])[0]
2401 self.assertEqual(v, expected)
2404 self.assertEqual(v, expected)
2406 def test_max_with_inf(self):
2407 _TestTorchMixin._test_max_with_inf(self, (torch.half, torch.float, torch.double),
'cuda')
2409 def test_min_with_inf(self):
2410 _TestTorchMixin._test_min_with_inf(self, (torch.half, torch.float, torch.double),
'cuda')
2412 def test_rpow(self):
2413 _TestTorchMixin._test_rpow(self,
lambda x: x.cuda())
2415 def test_int_pow(self):
2416 _TestTorchMixin._test_int_pow(self,
lambda x: x.cuda())
2418 def test_remainder_overflow(self):
2419 _TestTorchMixin._test_remainder_overflow(self, dtype=torch.int64, device=
'cuda')
2422 cpu_tensor = torch.randn(2, 3, 3)
2423 gpu_tensor = cpu_tensor.cuda()
2424 self.assertEqual(gpu_tensor.var(), cpu_tensor.var())
2425 self.assertEqual(gpu_tensor.var(1), cpu_tensor.var(1))
2426 self.assertEqual(gpu_tensor.var(2), cpu_tensor.var(2))
2427 self.assertEqual(gpu_tensor.std(), cpu_tensor.std())
2428 self.assertEqual(gpu_tensor.std(1), cpu_tensor.std(1))
2429 self.assertEqual(gpu_tensor.var(2), cpu_tensor.var(2))
2431 cpu_tensor = torch.randn(100)
2432 gpu_tensor = cpu_tensor.cuda()
2433 self.assertEqual(gpu_tensor.var(), cpu_tensor.var())
2435 def test_var_unbiased(self):
2436 tensor = torch.randn(100).cuda()
2437 self.assertEqual(tensor.var(0), tensor.var(0, unbiased=
True))
2438 self.assertEqual(tensor.var(), tensor.var(unbiased=
True))
2439 self.assertEqual(tensor.var(unbiased=
False), tensor.var(0, unbiased=
False))
2441 tensor = torch.FloatTensor([1.0, 2.0]).cuda()
2442 self.assertEqual(tensor.var(unbiased=
True), 0.5)
2443 self.assertEqual(tensor.var(unbiased=
False), 0.25)
2445 tensor = torch.randn(100).cuda()
2446 self.assertEqual(tensor.std(0), tensor.std(0, unbiased=
True))
2447 self.assertEqual(tensor.std(), tensor.std(unbiased=
True))
2448 self.assertEqual(tensor.std(unbiased=
False), tensor.std(0, unbiased=
False))
2450 def test_var_large_input(self):
2452 tensor_cpu = torch.randn(2 * 32 * 1024 + 1, 2, 67)
2453 tensor_cuda = tensor_cpu.cuda()
2455 self.assertEqual(tensor_cpu.var(2), tensor_cuda.var(2).cpu())
2457 def test_var_stability(self):
2458 tensor = torch.FloatTensor([2281.5, 2281.25]).cuda()
2461 self.assertEqual(tensor.var(0), 0.03125)
2464 self.assertEqual(tensor.var(), 0.03125)
2467 tensor = tensor.unsqueeze(1)
2468 self.assertEqual(tensor.var(0), 0.03125)
2471 def test_digamma(self):
2472 def test(use_double=False):
2473 cpu_tensor = torch.randn(10, 10, 10)
2474 gpu_tensor = cpu_tensor.cuda()
2475 zeros = torch.zeros(10, 10, 10)
2477 cpu_tensor = cpu_tensor.double()
2478 gpu_tensor = gpu_tensor.double()
2479 zeros = zeros.double()
2480 cpu_out = cpu_tensor.digamma()
2481 gpu_out = gpu_tensor.digamma()
2482 norm_errors = (gpu_out - cpu_out.cuda()) / gpu_out
2483 self.assertEqual(norm_errors, zeros)
2489 cpu_tensor =
torch.tensor([-0.999999994, -1.999999994, -2.0000000111,
2490 -100.99999994, -1931.99999994, 0.000000111,
2491 -0.000000111, 0, -1, -2, -931])
2492 expected_errors =
torch.tensor([0, 0, 0, 0, 0, 0, 0, nan, nan, nan, nan])
2493 gpu_tensor = cpu_tensor.cuda()
2494 cpu_out = cpu_tensor.digamma()
2495 gpu_out = gpu_tensor.digamma()
2496 norm_errors = (gpu_out - cpu_out.cuda()) / gpu_out
2497 self.assertEqual(norm_errors, expected_errors)
2500 def test_polygamma(self):
2501 def test(use_double=False):
2502 cpu_tensor = torch.randn(10, 10, 10)
2503 gpu_tensor = cpu_tensor.cuda()
2504 zeros = torch.zeros(10, 10, 10)
2506 cpu_tensor = cpu_tensor.double()
2507 gpu_tensor = gpu_tensor.double()
2508 zeros = zeros.double()
2510 cpu_out = cpu_tensor.polygamma(n)
2511 gpu_out = gpu_tensor.polygamma(n)
2512 norm_errors = (gpu_out - cpu_out.cuda()) / gpu_out
2513 self.assertEqual(norm_errors, zeros)
2518 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2519 def test_symeig(self):
2520 _TestTorchMixin._test_symeig(self,
lambda t: t.cuda())
2522 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2523 def test_svd_no_singularvectors(self):
2524 _TestTorchMixin._test_svd_no_singularvectors(self,
lambda t: t.cuda())
2526 def test_arange(self):
2527 for t
in [
'IntTensor',
'LongTensor',
'FloatTensor',
'DoubleTensor']:
2528 a = torch.cuda.__dict__[t]()
2529 torch.arange(0, 10, out=a)
2530 b = torch.__dict__[t]()
2531 torch.arange(0, 10, out=b)
2532 self.assertEqual(a, b.cuda())
2534 def test_linspace(self):
2535 a = torch.linspace(0, 10, 10, device=
'cuda')
2536 b = torch.linspace(0, 10, 10)
2537 self.assertEqual(a, b.cuda())
2539 def test_logspace(self):
2540 a = torch.logspace(1, 10, 10, device=
'cuda')
2541 b = torch.logspace(1, 10, 10)
2542 self.assertEqual(a, b.cuda())
2544 def test_lerp(self):
2545 _TestTorchMixin._test_lerp(self,
lambda t: t.cuda())
2547 def test_diagonal(self):
2548 _TestTorchMixin._test_diagonal(self, dtype=torch.float32, device=
'cuda')
2550 def test_diagflat(self):
2551 _TestTorchMixin._test_diagflat(self, dtype=torch.float32, device=
'cuda')
2553 @unittest.skipIf(
not TEST_NUMPY,
"NumPy not found")
2554 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2556 def test_norm(self):
2557 _TestTorchMixin._test_norm(self, device=
'cuda')
2559 def test_dist(self):
2560 _TestTorchMixin._test_dist(self, device=
'cuda')
2562 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2563 def test_geqrf(self):
2564 _TestTorchMixin._test_geqrf(self,
lambda t: t.cuda())
2566 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2567 def test_trtrs(self):
2568 _TestTorchMixin._test_trtrs(self,
lambda t: t.cuda())
2570 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2571 def test_trtrs_batched(self):
2572 _TestTorchMixin._test_trtrs_batched(self,
lambda t: t.cuda())
2574 @unittest.skipIf(
not TEST_MAGMA,
"no MAGMA library detected")
2575 def test_trtrs_batched_dims(self):
2576 _TestTorchMixin._test_trtrs_batched_dims(self,
lambda t: t.cuda())
2578 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
2579 def test_get_set_rng_state_all(self):
2580 states = torch.cuda.get_rng_state_all()
2581 before0 = torch.cuda.FloatTensor(100, device=0).normal_()
2582 before1 = torch.cuda.FloatTensor(100, device=1).normal_()
2583 torch.cuda.set_rng_state_all(states)
2584 after0 = torch.cuda.FloatTensor(100, device=0).normal_()
2585 after1 = torch.cuda.FloatTensor(100, device=1).normal_()
2586 self.assertEqual(before0, after0, 0)
2587 self.assertEqual(before1, after1, 0)
2590 def test_nvtx(self):
2596 def test_randperm_cuda(self):
2597 cuda = torch.device(
'cuda:0')
2601 res1 = torch.randperm(100, device=cuda)
2602 res2 = torch.cuda.LongTensor()
2603 torch.randperm(100, out=res2, device=cuda)
2604 self.assertEqual(res1, res2, 0)
2607 res1 = torch.randperm(100000, device=cuda)
2608 res2 = torch.cuda.LongTensor()
2609 torch.randperm(100000, out=res2, device=cuda)
2610 self.assertEqual(res1, res2, 0)
2613 res1 = torch.randperm(100, dtype=torch.half, device=cuda)
2614 res2 = torch.cuda.HalfTensor()
2615 torch.randperm(100, out=res2, device=cuda)
2616 self.assertEqual(res1, res2, 0)
2619 res1 = torch.randperm(50000, dtype=torch.half, device=cuda)
2620 res2 = torch.cuda.HalfTensor()
2621 torch.randperm(50000, out=res2, device=cuda)
2622 self.assertEqual(res1, res2, 0)
2625 res1 = torch.randperm(0, device=cuda)
2626 res2 = torch.cuda.LongTensor(5)
2627 torch.randperm(0, out=res2, device=cuda)
2628 self.assertEqual(res1.numel(), 0)
2629 self.assertEqual(res2.numel(), 0)
2631 def test_random_neg_values(self):
2632 _TestTorchMixin._test_random_neg_values(self, use_cuda=
True)
2634 def test_bincount_cuda(self):
2635 _TestTorchMixin._test_bincount(self, device=
'cuda')
2637 input_size = (5000,)
2638 w = torch.randn(input_size, device=
'cuda')
2641 t = torch.randint(50, input_size, dtype=torch.int8, device=
'cuda')
2642 self.assertEqual(t.cpu().bincount(), t.bincount())
2643 self.assertEqual(t.cpu().bincount(w_cpu), t.bincount(w))
2646 t = torch.randint(500, input_size, dtype=torch.int64, device=
'cuda')
2647 self.assertEqual(t.cpu().bincount(), t.bincount())
2648 self.assertEqual(t.cpu().bincount(w_cpu), t.bincount(w))
2651 t = torch.randint(2000, input_size, dtype=torch.int64, device=
'cuda')
2652 self.assertEqual(t.cpu().bincount(), t.bincount())
2653 self.assertEqual(t.cpu().bincount(w_cpu), t.bincount(w))
2655 def test_histc_cuda(self):
2656 _TestTorchMixin._test_histc(self, device=
'cuda')
2658 def test_tiny_half_norm_(self):
2659 a = torch.arange(25).cuda().float()
2662 self.assertGreater(b.norm().item(), 0)
2664 def test_norm_type_conversion(self):
2665 a = torch.ones(65536).cuda().half()
2666 self.assertEqual(a.norm(p=0, dtype=torch.float32), 65536)
2671 def test_cuda_memory_leak_detection(self):
2674 @self.wrap_with_cuda_memory_check
2678 @self.wrap_with_cuda_memory_check
2680 l.append(
torch.tensor(10, device=torch.device(
"cuda:0")))
2684 with self.assertRaisesRegex(AssertionError,
r"leaked \d+ bytes CUDA memory on device 0"):
2688 @self.wrap_with_cuda_memory_check
2690 l.append(
torch.tensor(10, device=torch.device(
"cuda:1")))
2692 with self.assertRaisesRegex(AssertionError,
r"leaked \d+ bytes CUDA memory on device 1"):
2695 def test_cuda_memory_leak_detection_propagates_errors(self):
2696 with self.assertRaisesRegex(RuntimeError,
r"The size of tensor a \(3\) must match"):
2697 with self.assertLeaksNoCudaTensors():
2698 x = torch.randn(3, 1, device=
'cuda')
2699 y = torch.randn(2, 1, device=
'cuda')
2702 def test_trilu_indices(self):
2703 for test_args
in tri_tests_args:
2704 _compare_trilu_indices(self, *test_args, device=
'cuda')
2708 3, 3, dtype=torch.long, device=
'cuda', layout=torch.strided)
2710 x.tril(0).nonzero().transpose(0, 1),
2711 torch.tril_indices(3, 3, device=
'cuda'))
2713 x.triu(0).nonzero().transpose(0, 1),
2714 torch.triu_indices(3, 3, device=
'cuda'))
2716 def test_large_trilu_indices(self):
2717 for test_args
in tri_large_tests_args:
2718 _compare_large_trilu_indices(self, *test_args, device=
'cuda')
2720 def test_triu_tril(self):
2721 _TestTorchMixin._test_triu_tril(self,
lambda t: t.cuda())
2723 def test_cuda_round(self):
2725 a = [-5.8, -3.5, -2.3, -1.5, -0.5, 0.5, 1.5, 2.3, 3.5, 5.8]
2726 res = [-6., -4., -2., -2., 0., 0., 2., 2., 4., 6.]
2729 torch.HalfTensor(a).cuda().round().cpu(),
2730 torch.HalfTensor(res).cpu())
2732 torch.FloatTensor(a).cuda().round().cpu(),
2733 torch.FloatTensor(res).cpu())
2735 torch.DoubleTensor(a).cuda().round().cpu(),
2736 torch.DoubleTensor(res).cpu())
2739 def load_ignore_file():
2740 from os.path
import join, dirname
2742 path = join(dirname(__file__),
'data',
'test_cuda_ignores.txt')
2743 with open(path,
'r') as f: 2744 ignores = {l for l
in f.read().splitlines()
if not l.startswith(
'#')}
2747 def generate_tests():
2758 name, constr, arg_constr = decl
2759 elif len(decl) == 4:
2760 name, constr, arg_constr, desc = decl
2761 elif len(decl) == 5:
2762 name, constr, arg_constr, desc, type_subset = decl
2763 elif len(decl) == 6:
2764 name, constr, arg_constr, desc, type_subset, no_inplace = decl
2765 elif len(decl) == 7:
2766 name, constr, arg_constr, desc, type_subset, no_inplace, decorator = decl
2768 if t
not in type_subset:
2770 if TEST_WITH_ROCM
and decorator
is not None:
2771 if (isinstance(decorator, str)):
2772 tensor_type_name = str(t.__name__)
2773 decorator_list = decorator.split(
":")
2774 skip_type_list = decorator_list[1].
split(
",")
2775 if ((
"ByteTensor" in skip_type_list)
and tensor_type_name ==
"ByteTensor") \
2776 or ((
"CharTensor" in skip_type_list)
and tensor_type_name ==
"CharTensor") \
2777 or ((
"DoubleTensor" in skip_type_list)
and tensor_type_name ==
"DoubleTensor") \
2778 or ((
"FloatTensor" in skip_type_list)
and tensor_type_name ==
"FloatTensor") \
2779 or ((
"HalfTensor" in skip_type_list)
and tensor_type_name ==
"HalfTensor") \
2780 or ((
"IntTensor" in skip_type_list)
and tensor_type_name ==
"IntTensor") \
2781 or ((
"LongTensor" in skip_type_list)
and tensor_type_name ==
"LongTensor") \
2782 or ((
"ShortTensor" in skip_type_list)
and tensor_type_name ==
"ShortTensor"):
2783 decorator = skipIfRocm
2786 elif ((
not TEST_WITH_ROCM)
and (decorator
is not None)):
2787 if (isinstance(decorator, str)):
2790 precision = custom_precision.get(name, TestCuda.precision)
2792 precision = custom_half_precision.get(name, precision)
2794 for inplace
in (
True,
False):
2795 if inplace
and no_inplace:
2798 name_inner = name +
'_' 2802 if t != torch.HalfTensor
and not hasattr(tensor, name_inner):
2806 full_name =
'{}.{}'.format(tensor.type(), name_inner)
2807 if full_name
in ignores:
2810 test_name =
'test_' + t.__name__ +
'_' + name_inner
2812 test_name +=
'_' + desc
2814 assert not hasattr(TestCuda, test_name),
"Duplicated test name: " + test_name
2816 test_fn = compare_cpu_gpu(constr, arg_constr, name_inner, t, precision)
2818 if decorator
is not None:
2819 test_fn = decorator(test_fn)
2821 setattr(TestCuda, test_name, test_fn)
2824 if __name__ ==
'__main__':
def reset_max_memory_cached(device=None)
def max_memory_allocated(device=None)
Module caffe2.python.layers.split.
def get_device_properties(device)
def max_memory_cached(device=None)
def fork_rng(devices=None, enabled=True, _caller="fork_rng", _devices_kw="devices")
def memory_cached(device=None)
def memory_allocated(device=None)
def get_device_capability(device=None)
def default_stream(device=None)
def get_device_name(device=None)
def reset_max_memory_allocated(device=None)
def current_stream(device=None)