1 from __future__
import absolute_import, division, print_function, unicode_literals
3 from functools
import partial
17 output_effective_lr=
False,
18 output_effective_lr_and_update=
False,
22 param_in_f32 = param_in
24 mom_in_f32 = mom_in.astype(np.float32)
25 param_in_f32 = param_in.astype(np.float32)
28 mom_out = mom_in_f32 + np.mean(np.square(grad))
30 mom_out = mom_in_f32 + np.square(grad)
31 effective_lr = lr / (np.sqrt(mom_out) + epsilon)
32 grad_adj = effective_lr * grad
33 param_out = param_in_f32 + grad_adj
35 if output_effective_lr_and_update:
38 param_out.astype(np.float16),
39 mom_out.astype(np.float16),
40 effective_lr.astype(np.float16),
41 grad_adj.astype(np.float16),
45 param_out.astype(np.float32),
46 mom_out.astype(np.float32),
47 effective_lr.astype(np.float32),
48 grad_adj.astype(np.float32),
50 elif output_effective_lr:
53 param_out.astype(np.float16),
54 mom_out.astype(np.float16),
55 effective_lr.astype(np.float16),
59 param_out.astype(np.float32),
60 mom_out.astype(np.float32),
61 effective_lr.astype(np.float32),
65 return (param_out.astype(np.float16), mom_out.astype(np.float16))
67 return (param_out.astype(np.float32), mom_out.astype(np.float32))
70 def adagrad_sparse_test_helper(
71 parent_test, inputs, lr, epsilon, engine, ref_adagrad, gc, dc, row_wise=
False 73 param, momentum, grad = inputs
76 momentum = momentum.reshape(momentum.shape[0], -1)[:, 0]
77 momentum = np.abs(momentum)
78 lr = np.array([lr], dtype=np.float32)
83 indices = np.empty(shape=(0,), dtype=np.int)
85 indices = np.random.choice(
86 np.arange(grad.shape[0]),
87 size=np.random.randint(grad.shape[0]),
94 op = core.CreateOperator(
95 "RowWiseSparseAdagrad" if row_wise
else "SparseAdagrad",
96 [
"param",
"momentum",
"indices",
"grad",
"lr"],
97 [
"param",
"momentum"],
103 def ref_sparse(param, momentum, indices, grad, lr, ref_using_fp16=False):
104 param_out = np.copy(param)
105 momentum_out = np.copy(momentum)
109 partial(ref_adagrad, using_fp16=ref_using_fp16)
113 for i, index
in enumerate(indices):
114 param_out[index], momentum_out[index] = ref_adagrad_temp(
121 return (param_out, momentum_out)
123 ref_using_fp16_values = [
False]
124 if gc == hu.gpu_do
and not row_wise:
125 ref_using_fp16_values.append(
True)
127 for ref_using_fp16
in ref_using_fp16_values:
129 print(
"test_sparse_adagrad with half precision embedding")
130 momentum_i = momentum.astype(np.float16)
131 param_i = param.astype(np.float16)
133 print(
"test_sparse_adagrad with full precision embedding")
134 momentum_i = momentum.astype(np.float32)
135 param_i = param.astype(np.float32)
137 parent_test.assertReferenceChecks(
138 gc, op, [param_i, momentum_i, indices, grad, lr, ref_using_fp16], ref_sparse