Caffe2 - Python API
A deep learning, cross platform ML framework
rprop.py
1 import math
2 import torch
3 from .optimizer import Optimizer
4 
5 
6 class Rprop(Optimizer):
7  """Implements the resilient backpropagation algorithm.
8 
9  Arguments:
10  params (iterable): iterable of parameters to optimize or dicts defining
11  parameter groups
12  lr (float, optional): learning rate (default: 1e-2)
13  etas (Tuple[float, float], optional): pair of (etaminus, etaplis), that
14  are multiplicative increase and decrease factors
15  (default: (0.5, 1.2))
16  step_sizes (Tuple[float, float], optional): a pair of minimal and
17  maximal allowed step sizes (default: (1e-6, 50))
18  """
19 
20  def __init__(self, params, lr=1e-2, etas=(0.5, 1.2), step_sizes=(1e-6, 50)):
21  if not 0.0 <= lr:
22  raise ValueError("Invalid learning rate: {}".format(lr))
23  if not 0.0 < etas[0] < 1.0 < etas[1]:
24  raise ValueError("Invalid eta values: {}, {}".format(etas[0], etas[1]))
25 
26  defaults = dict(lr=lr, etas=etas, step_sizes=step_sizes)
27  super(Rprop, self).__init__(params, defaults)
28 
29  def step(self, closure=None):
30  """Performs a single optimization step.
31 
32  Arguments:
33  closure (callable, optional): A closure that reevaluates the model
34  and returns the loss.
35  """
36  loss = None
37  if closure is not None:
38  loss = closure()
39 
40  for group in self.param_groups:
41  for p in group['params']:
42  if p.grad is None:
43  continue
44  grad = p.grad.data
45  if grad.is_sparse:
46  raise RuntimeError('Rprop does not support sparse gradients')
47  state = self.state[p]
48 
49  # State initialization
50  if len(state) == 0:
51  state['step'] = 0
52  state['prev'] = torch.zeros_like(p.data)
53  state['step_size'] = grad.new().resize_as_(grad).fill_(group['lr'])
54 
55  etaminus, etaplus = group['etas']
56  step_size_min, step_size_max = group['step_sizes']
57  step_size = state['step_size']
58 
59  state['step'] += 1
60 
61  sign = grad.mul(state['prev']).sign()
62  sign[sign.gt(0)] = etaplus
63  sign[sign.lt(0)] = etaminus
64  sign[sign.eq(0)] = 1
65 
66  # update stepsizes with step size updates
67  step_size.mul_(sign).clamp_(step_size_min, step_size_max)
68 
69  # for dir<0, dfdx=0
70  # for dir>=0 dfdx=dfdx
71  grad = grad.clone()
72  grad[sign.eq(etaminus)] = 0
73 
74  # update parameters
75  p.data.addcmul_(-1, grad.sign(), step_size)
76 
77  state['prev'].copy_(grad)
78 
79  return loss
def step(self, closure=None)
Definition: rprop.py:29