Caffe2 - Python API
A deep learning, cross platform ML framework
semi_random_features.py
1 from __future__ import absolute_import
2 from __future__ import division
3 from __future__ import print_function
4 from __future__ import unicode_literals
5 
6 from caffe2.python import schema
7 from caffe2.python.layers.arc_cosine_feature_map import ArcCosineFeatureMap
8 import numpy as np
9 
10 
12  """
13  Implementation of the semi-random kernel feature map.
14 
15  Applies H(x_rand) * x_rand^s * x_learned, where
16  H is the Heaviside step function,
17  x_rand is the input after applying FC with randomized parameters,
18  and x_learned is the input after applying FC with learnable parameters.
19 
20  If using multilayer model with semi-random layers, then input and output records
21  should have a 'full' and 'random' Scalar. The random Scalar will be passed as
22  input to process the random features.
23 
24  For more information, see the original paper:
25  https://arxiv.org/pdf/1702.08882.pdf
26 
27  Inputs :
28  output_dims -- dimensions of the output vector
29  s -- if s == 0, will obtain linear semi-random features;
30  else if s == 1, will obtain squared semi-random features;
31  else s >= 2, will obtain higher order semi-random features
32  scale_random -- amount to scale the standard deviation
33  (for random parameter initialization when weight_init or
34  bias_init hasn't been specified)
35  scale_learned -- amount to scale the standard deviation
36  (for learned parameter initialization when weight_init or
37  bias_init hasn't been specified)
38 
39  weight_init_random -- initialization distribution for random weight parameter
40  (if None, will use Gaussian distribution)
41  bias_init_random -- initialization distribution for random bias pararmeter
42  (if None, will use Uniform distribution)
43  weight_init_learned -- initialization distribution for learned weight parameter
44  (if None, will use Gaussian distribution)
45  bias_init_learned -- initialization distribution for learned bias pararmeter
46  (if None, will use Uniform distribution)
47  weight_optim -- optimizer for weight params for learned features
48  bias_optim -- optimizer for bias param for learned features
49 
50  set_weight_as_global_constant -- if True, initialized random parameters
51  will be constant across all distributed
52  instances of the layer
53  """
54  def __init__(
55  self,
56  model,
57  input_record,
58  output_dims,
59  s=1,
60  scale_random=1.0,
61  scale_learned=1.0,
62  weight_init_random=None,
63  bias_init_random=None,
64  weight_init_learned=None,
65  bias_init_learned=None,
66  weight_optim=None,
67  bias_optim=None,
68  set_weight_as_global_constant=False,
69  name='semi_random_features',
70  **kwargs):
71 
72  if isinstance(input_record, schema.Struct):
73  schema.is_schema_subset(
75  ('full', schema.Scalar()),
76  ('random', schema.Scalar()),
77  ),
78  input_record
79  )
80  self.input_record_full = input_record.full
81  self.input_record_random = input_record.random
82 
83  elif isinstance(input_record, schema.Scalar):
84  self.input_record_full = input_record
85  self.input_record_random = input_record
86 
87  super(SemiRandomFeatures, self).__init__(
88  model,
89  self.input_record_full,
90  output_dims,
91  s=s,
92  scale=scale_random, # To initialize the random parameters
93  weight_init=weight_init_random,
94  bias_init=bias_init_random,
95  weight_optim=None,
96  bias_optim=None,
97  set_weight_as_global_constant=set_weight_as_global_constant,
98  initialize_output_schema=False,
99  name=name,
100  **kwargs)
101 
103  ('full', schema.Scalar(
104  (np.float32, output_dims),
105  model.net.NextScopedBlob(name + '_full_output')
106  ),),
107  ('random', schema.Scalar(
108  (np.float32, output_dims),
109  model.net.NextScopedBlob(name + '_random_output')
110  ),),
111  )
112 
113  # To initialize the learnable parameters
114  assert (scale_learned > 0.0), \
115  "Expected scale (learned) > 0, got %s" % scale_learned
116  self.stddev = scale_learned * np.sqrt(1.0 / self.input_dims)
117 
118  # Learned Parameters
119  (self.learned_w, self.learned_b) = self._initialize_params(
120  'learned_w',
121  'learned_b',
122  w_init=weight_init_learned,
123  b_init=bias_init_learned,
124  w_optim=weight_optim,
125  b_optim=bias_optim
126  )
127 
128  def add_ops(self, net):
129  # Learned features: wx + b
130  learned_features = net.FC(self.input_record_full.field_blobs() +
131  [self.learned_w, self.learned_b],
132  net.NextScopedBlob('learned_features'))
133  # Random features: wx + b
134  random_features = net.FC(self.input_record_random.field_blobs() +
135  [self.random_w, self.random_b],
136  net.NextScopedBlob('random_features'))
137  processed_random_features = self._heaviside_with_power(
138  net,
139  random_features,
140  self.output_schema.random.field_blobs(),
141  self.s
142  )
143  net.Mul([processed_random_features, learned_features],
144  self.output_schema.full.field_blobs())
def _heaviside_with_power(self, net, input_features, output_blob, s)
def _initialize_params(self, w_name, b_name, w_init=None, b_init=None, w_optim=None, b_optim=None)