Caffe2 - Python API
A deep learning, cross platform ML framework
layer_normalization.py
1 from __future__ import absolute_import
2 from __future__ import division
3 from __future__ import print_function
4 from __future__ import unicode_literals
5 
6 from caffe2.python import schema
7 from caffe2.python.layers.layers import ModelLayer
8 
9 import numpy as np
10 
11 
13  def __init__(
14  self,
15  model,
16  input_record,
17  name='layer_normalization',
18  scale_optim=None,
19  bias_optim=None,
20  epsilon=1e-4,
21  axis=1,
22  use_layer_norm_op=True,
23  **kwargs
24  ):
25  super(LayerNormalization, self).__init__(
26  model, name, input_record, **kwargs)
27 
28  assert isinstance(input_record, schema.Scalar), (
29  "Incorrect input type: {}".format(input_record))
30 
31  self.input_shape = input_record.field_type().shape
32  self.axis = axis
33 
34  assert len(self.input_shape) >= 1, (
35  "This layer supports only >= 2D tesnors")
36  input_dims = self.input_shape[0]
37 
39  (np.float32, self.input_shape),
40  self.get_next_blob_reference('output')
41  )
42 
43  self.scale = self.create_param(param_name='scale',
44  shape=[input_dims],
45  initializer=('ConstantFill', {'value': 1.0}),
46  optimizer=scale_optim)
47  self.bias = self.create_param(param_name='bias',
48  shape=[input_dims],
49  initializer=('ConstantFill', {'value': 0.0}),
50  optimizer=bias_optim)
51  self.use_layer_norm_op = use_layer_norm_op
52 
53  if self.use_layer_norm_op:
54  self.epsilon = epsilon
55  else:
56  assert len(self.input_shape) == 1, (
57  "When using alternative implementation, "
58  "input data can only be 2D"
59  )
60  self.epsilon = model.maybe_add_global_constant(
61  "%s_epsilon" % self.name, float(epsilon)
62  )
63 
64  def add_ops_with_layer_norm_op(self, net):
65  input_blob = self.input_record.field_blobs()
66  ln_output = self.output_schema.field_blobs()
67 
68  output_blobs = [net.NextScopedBlob('ln_output'), net.NextScopedBlob('ln_mean'),
69  net.NextScopedBlob('ln_stdev')]
70 
71  normalized, mean, stdev = net.LayerNorm(input_blob,
72  output_blobs,
73  axis=self.axis,
74  epsilon=self.epsilon)
75 
76  scaled = net.Mul(
77  [normalized, self.scale],
78  [net.NextScopedBlob('ln_scaled')],
79  broadcast=1,
80  axis=self.axis,
81  )
82 
83  net.Add(
84  [scaled, self.bias],
85  ln_output,
86  broadcast=1,
87  axis=self.axis,
88  )
89 
90  def add_ops_without_layer_norm_op(self, net):
91  # two issues here:
92  # 1. use multiple ops to replace the function of LayerNorm
93  # 2. do not use legacy broadcast
94  ln_output = net.NextScopedBlob("ln_output")
95  ln_mean = net.NextScopedBlob("ln_mean")
96  ln_stdev = net.NextScopedBlob("ln_stdev")
97  ln_mean_arr = net.NextScopedBlob("ln_mean_arr")
98  net.ReduceBackMean(self.input_record.field_blobs(), [ln_mean_arr])
99  net.ExpandDims([ln_mean_arr], [ln_mean], dims=[1])
100  ln_centered = net.NextScopedBlob("ln_centered")
101  net.Sub(self.input_record.field_blobs() + [ln_mean], [ln_centered])
102  ln_sqr = net.NextScopedBlob("ln_sqr")
103  net.Sqr([ln_centered], [ln_sqr])
104  ln_sqr_mean = net.NextScopedBlob("ln_sqr_mean")
105  net.ReduceBackMean([ln_sqr], [ln_sqr_mean])
106  ln_var = net.NextScopedBlob("ln_var")
107  net.Add([ln_sqr_mean, self.epsilon], ln_var)
108  ln_std_arr = net.NextScopedBlob("ln_std_arr")
109  net.Pow([ln_var], [ln_std_arr], exponent=0.5)
110  net.ExpandDims([ln_std_arr], [ln_stdev], dims=[1])
111  net.Div([ln_centered, ln_stdev], [ln_output])
112  ln_scaled = net.NextScopedBlob("ln_scaled")
113  net.Mul([ln_output, self.scale], [ln_scaled])
114  net.Add([ln_scaled, self.bias], self.output_schema.field_blobs())
115 
116  def add_ops(self, net):
117  if self.use_layer_norm_op:
119  else:
def get_next_blob_reference(self, name)
Definition: layers.py:349
def create_param(self, param_name, shape, initializer, optimizer, ps_param=None, regularizer=None)
Definition: layers.py:334