Caffe2 - Python API
A deep learning, cross platform ML framework
batch_softmax_loss.py
1 ## @package batch_softmax_loss
2 # Module caffe2.python.layers.batch_softmax_loss
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 from caffe2.python import core, schema
9 from caffe2.python.layers.layers import ModelLayer
10 import numpy as np
11 
12 
14  def __init__(
15  self,
16  model,
17  input_record,
18  name='batch_softmax_loss',
19  label_smoothing_matrix=None,
20  label_prob=False,
21  **kwargs
22  ):
23  super(BatchSoftmaxLoss, self).__init__(
24  model, name, input_record, **kwargs)
25 
26  assert schema.is_schema_subset(
28  ('label', schema.Scalar()),
29  ('prediction', schema.Scalar()),
30  ),
31  input_record
32  )
33  self.label_prob = label_prob
34 
35  # label smoothing matrix: a K * K matrix where K is the label
36  # cardinality; (i, j) element is the value of for label i
37  # treated/smoothed as label j
38  self.label_smoothing_matrix = label_smoothing_matrix
39  if self.label_smoothing_matrix is not None:
41 
43  (
44  'softmax', schema.Scalar(
45  input_record.prediction.field_type(),
46  self.get_next_blob_reference('softmax')
47  )
48  ),
49  (
50  'loss', schema.Scalar(
51  np.float32, self.get_next_blob_reference('loss')
52  )
53  ),
54  )
55 
56  def initialize_label_smoothing_constants(self):
57  assert self.label_smoothing_matrix is not None
58  self.label_smoothing_matrix = np.array(
59  self.label_smoothing_matrix).astype(np.float32)
60  assert len(self.label_smoothing_matrix.shape) == 2
61  label_dim = self.label_smoothing_matrix.shape[0]
62  assert label_dim == self.label_smoothing_matrix.shape[1]
63 
64  self.label_smoothing_matrix = self.model.add_global_constant(
65  '%s_label_smoothing_matrix' % self.name,
66  array=self.label_smoothing_matrix,
67  dtype=np.dtype(np.float32),
68  )
69  self.label_dim = self.model.add_global_constant(
70  '%s_label_dim' % self.name,
71  array=label_dim,
72  dtype=np.dtype(np.int64),
73  )
74  # default case: label is given NOT as target distribution
75  # but when used in label smoothing, the label must be in probabilities
76  self.label_prob = True
77 
78  def compute_smoothed_label(self, net):
79  assert self.label_smoothing_matrix is not None
80  label = self.input_record.label()
81  original_label_type = self.input_record.label.field_type()
82  if original_label_type.base != np.int64:
83  int64_label = net.NextScopedBlob('int64_label')
84  net.Cast([label], [int64_label], to=core.DataType.INT64)
85  else:
86  int64_label = label
87  one_hot_label = net.NextScopedBlob('one_hot_label')
88  smoothed_label = net.NextScopedBlob('smoothed_label')
89  net.OneHot([int64_label, self.label_dim], [one_hot_label])
90  net.MatMul([one_hot_label, self.label_smoothing_matrix], smoothed_label)
91  return smoothed_label
92 
93  def add_ops(self, net):
94  label = self.input_record.label.field_blobs()
95  if self.label_smoothing_matrix is not None:
96  label = [self.compute_smoothed_label(net)]
97  elif not self.label_prob:
98  if self.input_record.label.field_types()[0].base != np.int32:
99  label = [
100  net.Cast(label,
101  net.NextScopedBlob('int32_label'),
102  to=core.DataType.INT32)
103  ]
104 
105  softmax_input = self.input_record.prediction.field_blobs() + label
106 
107  if 'weight' in self.input_record:
108  weight_blob = self.input_record.weight()
109  if self.input_record.weight.field_type().base != np.float32:
110  weight_blob = net.Cast(
111  weight_blob,
112  weight_blob + '_float32',
113  to=core.DataType.FLOAT
114  )
115 
116  softmax_input += [weight_blob]
117 
118  net.SoftmaxWithLoss(
119  softmax_input,
120  self.output_schema.field_blobs(),
121  label_prob=self.label_prob,
122  )
def get_next_blob_reference(self, name)
Definition: layers.py:349