doxygen-python/html/adagrad__test__helper_8py_source.html

 from __future__ import absolute_import, division, print_function, unicode_literals

 from functools import partial

 import caffe2.python.hypothesis_test_util as hu
 import numpy as np
 from caffe2.python import core


 def ref_adagrad(
     param_in,
     mom_in,
     grad,
     lr,
     epsilon,
     using_fp16=False,
     output_effective_lr=False,
     output_effective_lr_and_update=False,
     row_wise=False,
 ):
     mom_in_f32 = mom_in
     param_in_f32 = param_in
     if using_fp16:
         mom_in_f32 = mom_in.astype(np.float32)
         param_in_f32 = param_in.astype(np.float32)

     if row_wise:
         mom_out = mom_in_f32 + np.mean(np.square(grad))
     else:
         mom_out = mom_in_f32 + np.square(grad)
     effective_lr = lr / (np.sqrt(mom_out) + epsilon)
     grad_adj = effective_lr * grad
     param_out = param_in_f32 + grad_adj

     if output_effective_lr_and_update:
         if using_fp16:
             return (
                 param_out.astype(np.float16),
                 mom_out.astype(np.float16),
                 effective_lr.astype(np.float16),
                 grad_adj.astype(np.float16),
             )
         else:
             return (
                 param_out.astype(np.float32),
                 mom_out.astype(np.float32),
                 effective_lr.astype(np.float32),
                 grad_adj.astype(np.float32),
             )
     elif output_effective_lr:
         if using_fp16:
             return (
                 param_out.astype(np.float16),
                 mom_out.astype(np.float16),
                 effective_lr.astype(np.float16),
             )
         else:
             return (
                 param_out.astype(np.float32),
                 mom_out.astype(np.float32),
                 effective_lr.astype(np.float32),
             )

     if using_fp16:
         return (param_out.astype(np.float16), mom_out.astype(np.float16))
     else:
         return (param_out.astype(np.float32), mom_out.astype(np.float32))


 def adagrad_sparse_test_helper(
     parent_test, inputs, lr, epsilon, engine, ref_adagrad, gc, dc, row_wise=False
 ):
     param, momentum, grad = inputs
     if row_wise:
         # For row-wise adagrad, only take the first element of each row
         momentum = momentum.reshape(momentum.shape[0], -1)[:, 0]
     momentum = np.abs(momentum)
     lr = np.array([lr], dtype=np.float32)

     # Create an indexing array containing values that are lists of indices,
     # which index into grad
     if grad.size == 0:
         indices = np.empty(shape=(0,), dtype=np.int)
     else:
         indices = np.random.choice(
             np.arange(grad.shape[0]),
             size=np.random.randint(grad.shape[0]),
             replace=False,
         )

     # Sparsify grad
     grad = grad[indices]

     op = core.CreateOperator(
         "RowWiseSparseAdagrad" if row_wise else "SparseAdagrad",
         ["param", "momentum", "indices", "grad", "lr"],
         ["param", "momentum"],
         epsilon=epsilon,
         engine=engine,
         device_option=gc,
     )

     def ref_sparse(param, momentum, indices, grad, lr, ref_using_fp16=False):
         param_out = np.copy(param)
         momentum_out = np.copy(momentum)
         # Need to do this because it's possible ref_adagrad's using_fp16 could
         # have been already specialized.
         ref_adagrad_temp = (
             partial(ref_adagrad, using_fp16=ref_using_fp16)
             if ref_using_fp16
             else ref_adagrad
         )
         for i, index in enumerate(indices):
             param_out[index], momentum_out[index] = ref_adagrad_temp(
                 param[index],
                 momentum[index],
                 grad[i],
                 lr,
                 epsilon,
             )
         return (param_out, momentum_out)

     ref_using_fp16_values = [False]
     if gc == hu.gpu_do and not row_wise:
         ref_using_fp16_values.append(True)

     for ref_using_fp16 in ref_using_fp16_values:
         if ref_using_fp16:
             print("test_sparse_adagrad with half precision embedding")
             momentum_i = momentum.astype(np.float16)
             param_i = param.astype(np.float16)
         else:
             print("test_sparse_adagrad with full precision embedding")
             momentum_i = momentum.astype(np.float32)
             param_i = param.astype(np.float32)

         parent_test.assertReferenceChecks(
             gc, op, [param_i, momentum_i, indices, grad, lr, ref_using_fp16], ref_sparse
         )
caffe2.python.hypothesis_test_util
Definition: hypothesis_test_util.py:1

caffe2.python
Definition: __init__.py:1