1 #include "caffe2/operators/dropout_op.h" 6 bool DropoutOp<float, CPUContext>::RunOnDevice() {
8 auto* Y = Output(0, X.sizes(), at::dtype<float>());
11 if (!IsInputOutputAlias(0, 0)) {
12 context_.CopyFromCPU<
float>(
13 X.numel(), X.data<
float>(), Y->template mutable_data<float>());
17 float scale = 1. / (1. - ratio_);
20 std::bernoulli_distribution dist(1. - ratio_);
21 const float* Xdata = X.data<
float>();
22 float* Ydata = Y->template mutable_data<float>();
24 auto mask = Output(1, X.sizes(), at::dtype<bool>());
25 bool* mask_data = mask->template mutable_data<bool>();
26 auto& gen = context_.RandGenerator();
27 for (
int i = 0; i < X.numel(); ++i) {
28 mask_data[i] = dist(gen);
29 Ydata[i] = Xdata[i] * scale * mask_data[i];
36 bool DropoutGradientOp<float, CPUContext>::RunOnDevice() {
39 auto* dX = Output(0, dY.sizes(), at::dtype<float>());
42 context_.CopyFromCPU<
float>(
43 dY.numel(), dY.data<
float>(), dX->template mutable_data<float>());
47 auto& mask = Input(1);
48 CAFFE_ENFORCE_EQ(dY.numel(), mask.numel());
49 const float* dYdata = dY.data<
float>();
50 const bool* mask_data = mask.data<
bool>();
51 float* dXdata = dX->template mutable_data<float>();
52 float scale = 1. / (1. - ratio_);
53 for (
int i = 0; i < dY.numel(); ++i) {
54 dXdata[i] = dYdata[i] * mask_data[i] * scale;
60 REGISTER_CPU_OPERATOR(Dropout, DropoutOp<float, CPUContext>);
61 REGISTER_CPU_GRADIENT_OPERATOR(
63 DropoutGradientOp<float, CPUContext>);
65 OPERATOR_SCHEMA(Dropout)
68 .AllowInplace({{0, 0}})
69 .TensorInferenceFunction([](
const OperatorDef& def,
70 const vector<TensorShape>& in) {
71 CAFFE_ENFORCE_EQ(1, in.size());
72 vector<TensorShape> out;
73 ArgumentHelper argsHelper(def);
75 if (def.output().size() == 2) {
77 out[1].set_data_type(TensorProto_DataType_BOOL);
83 `Dropout` takes one input data tensor (`X`) and produces two tensor outputs, `Y` and 84 `mask`. If the `is_test` argument is zero (default=0), the output `Y` will be the input 85 with random elements zeroed. The probability that a given element is zeroed is 86 determined by the `ratio` argument. 88 If the `is_test` argument is set to non-zero, the output `Y` is exactly the same as the 89 input `X`. Note that outputs are scaled by a factor of $\frac{1}{1-ratio}$ during 90 training, so that during test time, we can simply compute an identity function. This 91 scaling is important because we want the output at test time to equal the expected value 92 at training time. Dropout has been proven to be an effective regularization technique to 93 prevent overfitting during training. 98 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/dropout_op.h 99 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/dropout_op.cc 104 <summary> <b>Example</b> </summary> 109 workspace.ResetWorkspace() 111 op = core.CreateOperator( 119 workspace.FeedBlob("X", np.random.randint(10, size=(5, 5)).astype(np.float32)) 120 print("X:", workspace.FetchBlob("X")) 121 workspace.RunOperatorOnce(op) 122 print("Y:", workspace.FetchBlob("Y")) 123 print("mask:", workspace.FetchBlob("mask")) 134 Y: [[ 0. 0. 0. 12. 18.] 139 mask: [[False False False True True] 140 [False False True True False] 141 [False False True True True] 142 [False False True False False] 143 [ True False False False False]] 151 "*(type: float; default: 0.5)* Probability of an element to be zeroed.")
153 "*(type: int; default: 0)* If zero (train mode), perform dropout. If non-zero" 154 "(test mode), Y = X.")
155 .Input(0,
"X",
"*(type: Tensor`<float>`)* Input data tensor.")
156 .Output(0,
"Y",
"*(type: Tensor`<float>`)* Output tensor.")
160 "*(type: Tensor`<bool>`)* The output mask containing boolean values for" 161 "each element, signifying which elements are dropped out. If `is_test` is" 162 "nonzero, this output is not filled.")
163 .InheritOnnxSchema();
165 GRADIENT_OPERATOR_SCHEMA(DropoutGrad)
168 .AllowInplace({{0, 0}});
171 using GradientMakerBase::GradientMakerBase;
172 vector<OperatorDef> GetGradientDefs()
override {
174 auto is_test = argshelper.GetSingleArgument<
bool>(
"is_test", 0);
177 "DropoutGrad",
"", vector<string>{GO(0)}, vector<string>{GI(0)});
182 vector<string>{GO(0), O(1)},
183 vector<string>{GI(0)});
A helper class to index into arguments.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...