Caffe2 - Python API
A deep learning, cross platform ML framework
adagrad_test_helper.py
1 from __future__ import absolute_import, division, print_function, unicode_literals
2 
3 from functools import partial
4 
6 import numpy as np
7 from caffe2.python import core
8 
9 
10 def ref_adagrad(
11  param_in,
12  mom_in,
13  grad,
14  lr,
15  epsilon,
16  using_fp16=False,
17  output_effective_lr=False,
18  output_effective_lr_and_update=False,
19  row_wise=False,
20 ):
21  mom_in_f32 = mom_in
22  param_in_f32 = param_in
23  if using_fp16:
24  mom_in_f32 = mom_in.astype(np.float32)
25  param_in_f32 = param_in.astype(np.float32)
26 
27  if row_wise:
28  mom_out = mom_in_f32 + np.mean(np.square(grad))
29  else:
30  mom_out = mom_in_f32 + np.square(grad)
31  effective_lr = lr / (np.sqrt(mom_out) + epsilon)
32  grad_adj = effective_lr * grad
33  param_out = param_in_f32 + grad_adj
34 
35  if output_effective_lr_and_update:
36  if using_fp16:
37  return (
38  param_out.astype(np.float16),
39  mom_out.astype(np.float16),
40  effective_lr.astype(np.float16),
41  grad_adj.astype(np.float16),
42  )
43  else:
44  return (
45  param_out.astype(np.float32),
46  mom_out.astype(np.float32),
47  effective_lr.astype(np.float32),
48  grad_adj.astype(np.float32),
49  )
50  elif output_effective_lr:
51  if using_fp16:
52  return (
53  param_out.astype(np.float16),
54  mom_out.astype(np.float16),
55  effective_lr.astype(np.float16),
56  )
57  else:
58  return (
59  param_out.astype(np.float32),
60  mom_out.astype(np.float32),
61  effective_lr.astype(np.float32),
62  )
63 
64  if using_fp16:
65  return (param_out.astype(np.float16), mom_out.astype(np.float16))
66  else:
67  return (param_out.astype(np.float32), mom_out.astype(np.float32))
68 
69 
70 def adagrad_sparse_test_helper(
71  parent_test, inputs, lr, epsilon, engine, ref_adagrad, gc, dc, row_wise=False
72 ):
73  param, momentum, grad = inputs
74  if row_wise:
75  # For row-wise adagrad, only take the first element of each row
76  momentum = momentum.reshape(momentum.shape[0], -1)[:, 0]
77  momentum = np.abs(momentum)
78  lr = np.array([lr], dtype=np.float32)
79 
80  # Create an indexing array containing values that are lists of indices,
81  # which index into grad
82  if grad.size == 0:
83  indices = np.empty(shape=(0,), dtype=np.int)
84  else:
85  indices = np.random.choice(
86  np.arange(grad.shape[0]),
87  size=np.random.randint(grad.shape[0]),
88  replace=False,
89  )
90 
91  # Sparsify grad
92  grad = grad[indices]
93 
94  op = core.CreateOperator(
95  "RowWiseSparseAdagrad" if row_wise else "SparseAdagrad",
96  ["param", "momentum", "indices", "grad", "lr"],
97  ["param", "momentum"],
98  epsilon=epsilon,
99  engine=engine,
100  device_option=gc,
101  )
102 
103  def ref_sparse(param, momentum, indices, grad, lr, ref_using_fp16=False):
104  param_out = np.copy(param)
105  momentum_out = np.copy(momentum)
106  # Need to do this because it's possible ref_adagrad's using_fp16 could
107  # have been already specialized.
108  ref_adagrad_temp = (
109  partial(ref_adagrad, using_fp16=ref_using_fp16)
110  if ref_using_fp16
111  else ref_adagrad
112  )
113  for i, index in enumerate(indices):
114  param_out[index], momentum_out[index] = ref_adagrad_temp(
115  param[index],
116  momentum[index],
117  grad[i],
118  lr,
119  epsilon,
120  )
121  return (param_out, momentum_out)
122 
123  ref_using_fp16_values = [False]
124  if gc == hu.gpu_do and not row_wise:
125  ref_using_fp16_values.append(True)
126 
127  for ref_using_fp16 in ref_using_fp16_values:
128  if ref_using_fp16:
129  print("test_sparse_adagrad with half precision embedding")
130  momentum_i = momentum.astype(np.float16)
131  param_i = param.astype(np.float16)
132  else:
133  print("test_sparse_adagrad with full precision embedding")
134  momentum_i = momentum.astype(np.float32)
135  param_i = param.astype(np.float32)
136 
137  parent_test.assertReferenceChecks(
138  gc, op, [param_i, momentum_i, indices, grad, lr, ref_using_fp16], ref_sparse
139  )