Caffe2 - Python API
A deep learning, cross platform ML framework
semi_random_features.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 from __future__ import absolute_import
17 from __future__ import division
18 from __future__ import print_function
19 from __future__ import unicode_literals
20 
21 from caffe2.python import schema
22 from caffe2.python.layers.arc_cosine_feature_map import ArcCosineFeatureMap
23 import numpy as np
24 
25 
27  """
28  Implementation of the semi-random kernel feature map.
29 
30  Applies H(x_rand) * x_rand^s * x_learned, where
31  H is the Heaviside step function,
32  x_rand is the input after applying FC with randomized parameters,
33  and x_learned is the input after applying FC with learnable parameters.
34 
35  If using multilayer model with semi-random layers, then input and output records
36  should have a 'full' and 'random' Scalar. The random Scalar will be passed as
37  input to process the random features.
38 
39  For more information, see the original paper:
40  https://arxiv.org/pdf/1702.08882.pdf
41 
42  Inputs :
43  output_dims -- dimensions of the output vector
44  s -- if s == 0, will obtain linear semi-random features;
45  else if s == 1, will obtain squared semi-random features;
46  else s >= 2, will obtain higher order semi-random features
47  scale_random -- amount to scale the standard deviation
48  (for random parameter initialization when weight_init or
49  bias_init hasn't been specified)
50  scale_learned -- amount to scale the standard deviation
51  (for learned parameter initialization when weight_init or
52  bias_init hasn't been specified)
53 
54  weight_init_random -- initialization distribution for random weight parameter
55  (if None, will use Gaussian distribution)
56  bias_init_random -- initialization distribution for random bias pararmeter
57  (if None, will use Uniform distribution)
58  weight_init_learned -- initialization distribution for learned weight parameter
59  (if None, will use Gaussian distribution)
60  bias_init_learned -- initialization distribution for learned bias pararmeter
61  (if None, will use Uniform distribution)
62  weight_optim -- optimizer for weight params for learned features
63  bias_optim -- optimizer for bias param for learned features
64 
65  set_weight_as_global_constant -- if True, initialized random parameters
66  will be constant across all distributed
67  instances of the layer
68  """
69  def __init__(
70  self,
71  model,
72  input_record,
73  output_dims,
74  s=1,
75  scale_random=1.0,
76  scale_learned=1.0,
77  weight_init_random=None,
78  bias_init_random=None,
79  weight_init_learned=None,
80  bias_init_learned=None,
81  weight_optim=None,
82  bias_optim=None,
83  set_weight_as_global_constant=False,
84  name='semi_random_features',
85  **kwargs):
86 
87  if isinstance(input_record, schema.Struct):
88  schema.is_schema_subset(
90  ('full', schema.Scalar()),
91  ('random', schema.Scalar()),
92  ),
93  input_record
94  )
95  self.input_record_full = input_record.full
96  self.input_record_random = input_record.random
97 
98  elif isinstance(input_record, schema.Scalar):
99  self.input_record_full = input_record
100  self.input_record_random = input_record
101 
102  super(SemiRandomFeatures, self).__init__(
103  model,
104  self.input_record_full,
105  output_dims,
106  s=s,
107  scale=scale_random, # To initialize the random parameters
108  weight_init=weight_init_random,
109  bias_init=bias_init_random,
110  weight_optim=None,
111  bias_optim=None,
112  set_weight_as_global_constant=set_weight_as_global_constant,
113  initialize_output_schema=False,
114  name=name,
115  **kwargs)
116 
118  ('full', schema.Scalar(
119  (np.float32, output_dims),
120  model.net.NextScopedBlob(name + '_full_output')
121  ),),
122  ('random', schema.Scalar(
123  (np.float32, output_dims),
124  model.net.NextScopedBlob(name + '_random_output')
125  ),),
126  )
127 
128  # To initialize the learnable parameters
129  assert (scale_learned > 0.0), \
130  "Expected scale (learned) > 0, got %s" % scale_learned
131  self.stddev = scale_learned * np.sqrt(1.0 / self.input_dims)
132 
133  # Learned Parameters
134  (self.learned_w, self.learned_b) = self._initialize_params(
135  'learned_w',
136  'learned_b',
137  w_init=weight_init_learned,
138  b_init=bias_init_learned,
139  w_optim=weight_optim,
140  b_optim=bias_optim
141  )
142 
143  def add_ops(self, net):
144  # Learned features: wx + b
145  learned_features = net.FC(self.input_record_full.field_blobs() +
146  [self.learned_w, self.learned_b],
147  net.NextScopedBlob('learned_features'))
148  # Random features: wx + b
149  random_features = net.FC(self.input_record_random.field_blobs() +
150  [self.random_w, self.random_b],
151  net.NextScopedBlob('random_features'))
152  processed_random_features = self._heaviside_with_power(
153  net,
154  random_features,
155  self.output_schema.random.field_blobs(),
156  self.s
157  )
158  net.Mul([processed_random_features, learned_features],
159  self.output_schema.full.field_blobs())
def _heaviside_with_power(self, net, input_features, output_blob, s)
def _initialize_params(self, w_name, b_name, w_init=None, b_init=None, w_optim=None, b_optim=None)