Caffe2 - Python API
A deep learning, cross platform ML framework
arc_cosine_feature_map.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 from __future__ import absolute_import
17 from __future__ import division
18 from __future__ import print_function
19 from __future__ import unicode_literals
20 
21 from caffe2.python import schema
22 from caffe2.python.layers.layers import ModelLayer
23 import numpy as np
24 
25 
27  """
28  A general version of the arc-cosine kernel feature map (s = 1 restores
29  the original arc-cosine kernel feature map).
30 
31  Applies H(x) * x^s, where H is the Heaviside step function and x is the
32  input after applying FC (such that x = w * x_orig + b).
33 
34  For more information, see the original paper:
35  http://cseweb.ucsd.edu/~saul/papers/nips09_kernel.pdf
36 
37  Inputs :
38  output_dims -- dimensions of the output vector
39  s -- degree to raise transformed features
40  scale -- amount to scale the standard deviation
41  weight_init -- initialization distribution for weight parameter
42  bias_init -- initialization distribution for bias pararmeter
43  weight_optim -- optimizer for weight params; None for random features
44  bias_optim -- optimizer for bias param; None for random features
45  set_weight_as_global_constant -- if True, initialized random parameters
46  will be constant across all distributed
47  instances of the layer
48  initialize_output_schema -- if True, initialize output schema as Scalar
49  from Arc Cosine; else output schema is None
50  """
51  def __init__(
52  self,
53  model,
54  input_record,
55  output_dims,
56  s=1,
57  scale=1.0,
58  weight_init=None,
59  bias_init=None,
60  weight_optim=None,
61  bias_optim=None,
62  set_weight_as_global_constant=False,
63  initialize_output_schema=True,
64  name='arc_cosine_feature_map',
65  **kwargs):
66 
67  super(ArcCosineFeatureMap, self).__init__(model, name, input_record,
68  **kwargs)
69  assert isinstance(input_record, schema.Scalar), "Incorrect input type"
70  self.params = []
71  self.model = model
72  self.set_weight_as_global_constant = set_weight_as_global_constant
73 
74  self.input_dims = input_record.field_type().shape[0]
75  assert self.input_dims >= 1, "Expected input dimensions >= 1, got %s" \
76  % self.input_dims
77 
78  if initialize_output_schema:
80  (np.float32, (output_dims, )),
81  model.net.NextScopedBlob(name + '_output')
82  )
83 
84  self.output_dims = output_dims
85  assert self.output_dims >= 1, "Expected output dimensions >= 1, got %s" \
86  % self.output_dims
87  self.s = s
88  assert (self.s >= 0), "Expected s >= 0, got %s" % self.s
89  assert isinstance(self.s, int), "Expected s to be type int, got type %s" \
90  % type(self.s)
91 
92  assert (scale > 0.0), "Expected scale > 0, got %s" % scale
93  self.stddev = scale * np.sqrt(1.0 / self.input_dims)
94 
95  # Initialize train_init_net parameters
96  # Random Parameters
97  if set_weight_as_global_constant:
98  w_init = np.random.normal(scale=self.stddev,
99  size=(self.output_dims, self.input_dims))
100  b_init = np.random.uniform(low=-0.5 * self.stddev,
101  high=0.5 * self.stddev,
102  size=self.output_dims)
103  self.random_w = self.model.add_global_constant(
104  name=self.name + "_fixed_rand_W",
105  array=w_init
106  )
107  self.random_b = self.model.add_global_constant(
108  name=self.name + "_fixed_rand_b",
109  array=b_init
110  )
111  else:
112  (self.random_w, self.random_b) = self._initialize_params(
113  'random_w',
114  'random_b',
115  w_init=weight_init,
116  b_init=bias_init,
117  w_optim=weight_optim,
118  b_optim=bias_optim
119  )
120 
121  def _initialize_params(self, w_name, b_name, w_init=None, b_init=None,
122  w_optim=None, b_optim=None):
123  """
124  Initializes the Layer Parameters for weight and bias terms for features
125 
126  Inputs :
127  w_blob -- blob to contain w values
128  b_blob -- blob to contain b values
129  w_init -- initialization distribution for weight parameter
130  b_init -- initialization distribution for bias parameter
131  w_optim -- optimizer to use for w; if None, then will use no optimizer
132  b_optim -- optimizer to user for b; if None, then will use no optimizer
133  """
134 
135  w_init = w_init if w_init else (
136  'GaussianFill', {'mean': 0.0, 'std': self.stddev}
137  )
138  w_optim = w_optim if w_optim else self.model.NoOptim
139 
140  b_init = b_init if b_init else (
141  'UniformFill', {'min': -0.5 * self.stddev, 'max': 0.5 * self.stddev}
142  )
143  b_optim = b_optim if b_optim else self.model.NoOptim
144 
145  w_param = self.create_param(param_name=w_name,
146  shape=(self.output_dims, self.input_dims),
147  initializer=w_init,
148  optimizer=w_optim)
149 
150  b_param = self.create_param(param_name=b_name,
151  shape=[self.output_dims],
152  initializer=b_init,
153  optimizer=b_optim)
154 
155  return [w_param, b_param]
156 
157  def _heaviside_with_power(self, net, input_features, output_blob, s):
158  """
159  Applies Heaviside step function and Relu / exponentiation to features
160  depending on the value of s.
161 
162  Inputs:
163  net -- net with operators
164  input_features -- features to processes
165  output_blob -- output blob reference
166  s -- degree to raise the transformed features
167  """
168  if s == 0:
169  softsign_features = net.Softsign([input_features],
170  net.NextScopedBlob('softsign'))
171  return net.Relu(softsign_features, output_blob)
172  elif s == 1:
173  return net.Relu([input_features],
174  output_blob)
175  else:
176  relu_features = net.Relu([input_features],
177  net.NextScopedBlob('relu_rand'))
178  pow_features = net.Pow([input_features],
179  net.NextScopedBlob('pow_rand'),
180  exponent=float(s - 1))
181  return net.Mul([relu_features, pow_features],
182  output_blob)
183 
184  def add_ops(self, net):
185  input_blob = self.input_record.field_blobs()
186 
187  # Random features: wx + b
188  random_features = net.FC(input_blob + [self.random_w, self.random_b],
189  net.NextScopedBlob('random_features'))
190  # Process random features
191  self._heaviside_with_power(net,
192  random_features,
193  self.output_schema.field_blobs(),
194  self.s)
def _heaviside_with_power(self, net, input_features, output_blob, s)
def create_param(self, param_name, shape, initializer, optimizer, ps_param=None, regularizer=None)
Definition: layers.py:337
def _initialize_params(self, w_name, b_name, w_init=None, b_init=None, w_optim=None, b_optim=None)