Caffe2 - C++ API
A deep learning, cross platform ML framework
rnn.cpp
1 #include <gtest/gtest.h>
2 
3 #include <torch/nn/modules/linear.h>
4 #include <torch/nn/modules/rnn.h>
5 #include <torch/optim/adam.h>
6 #include <torch/types.h>
7 #include <torch/utils.h>
8 
9 #include <test/cpp/api/support.h>
10 
11 using namespace torch::nn;
12 using namespace torch::test;
13 
14 template <typename R, typename Func>
15 bool test_RNN_xor(Func&& model_maker, bool cuda = false) {
16  torch::manual_seed(0);
17 
18  auto nhid = 32;
19  auto model = std::make_shared<SimpleContainer>();
20  auto l1 = model->add(Linear(1, nhid), "l1");
21  auto rnn = model->add(model_maker(nhid), "rnn");
22  auto lo = model->add(Linear(nhid, 1), "lo");
23 
24  torch::optim::Adam optimizer(model->parameters(), 1e-2);
25  auto forward_op = [&](torch::Tensor x) {
26  auto T = x.size(0);
27  auto B = x.size(1);
28  x = x.view({T * B, 1});
29  x = l1->forward(x).view({T, B, nhid}).tanh_();
30  x = rnn->forward(x).output[T - 1];
31  x = lo->forward(x);
32  return x;
33  };
34 
35  if (cuda) {
36  model->to(torch::kCUDA);
37  }
38 
39  float running_loss = 1;
40  int epoch = 0;
41  auto max_epoch = 1500;
42  while (running_loss > 1e-2) {
43  auto bs = 16U;
44  auto nlen = 5U;
45 
46  const auto backend = cuda ? torch::kCUDA : torch::kCPU;
47  auto inputs =
48  torch::rand({nlen, bs, 1}, backend).round().to(torch::kFloat32);
49  auto labels = inputs.sum(0).detach();
50  inputs.set_requires_grad(true);
51 
52  auto outputs = forward_op(inputs);
53  torch::Tensor loss = torch::mse_loss(outputs, labels);
54 
55  optimizer.zero_grad();
56  loss.backward();
57  optimizer.step();
58 
59  running_loss = running_loss * 0.99 + loss.item<float>() * 0.01;
60  if (epoch > max_epoch) {
61  return false;
62  }
63  epoch++;
64  }
65  return true;
66 };
67 
68 void check_lstm_sizes(RNNOutput output) {
69  // Expect the LSTM to have 64 outputs and 3 layers, with an input of batch
70  // 10 and 16 time steps (10 x 16 x n)
71 
72  ASSERT_EQ(output.output.ndimension(), 3);
73  ASSERT_EQ(output.output.size(0), 10);
74  ASSERT_EQ(output.output.size(1), 16);
75  ASSERT_EQ(output.output.size(2), 64);
76 
77  ASSERT_EQ(output.state.ndimension(), 4);
78  ASSERT_EQ(output.state.size(0), 2); // (hx, cx)
79  ASSERT_EQ(output.state.size(1), 3); // layers
80  ASSERT_EQ(output.state.size(2), 16); // Batchsize
81  ASSERT_EQ(output.state.size(3), 64); // 64 hidden dims
82 
83  // Something is in the hiddens
84  ASSERT_GT(output.state.norm().item<float>(), 0);
85 }
86 
88 
89 TEST_F(RNNTest, CheckOutputSizes) {
90  LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
91  // Input size is: sequence length, batch size, input size
92  auto x = torch::randn({10, 16, 128}, torch::requires_grad());
93  auto output = model->forward(x);
94  auto y = x.mean();
95 
96  y.backward();
97  check_lstm_sizes(output);
98 
99  auto next = model->forward(x, output.state);
100 
101  check_lstm_sizes(next);
102 
103  torch::Tensor diff = next.state - output.state;
104 
105  // Hiddens changed
106  ASSERT_GT(diff.abs().sum().item<float>(), 1e-3);
107 }
108 
109 TEST_F(RNNTest, CheckOutputValuesMatchPyTorch) {
110  torch::manual_seed(0);
111  // Make sure the outputs match pytorch outputs
112  LSTM model(2, 2);
113  for (auto& v : model->parameters()) {
114  float size = v.numel();
115  auto p = static_cast<float*>(v.storage().data());
116  for (size_t i = 0; i < size; i++) {
117  p[i] = i / size;
118  }
119  }
120 
121  auto x = torch::empty({3, 4, 2}, torch::requires_grad());
122  float size = x.numel();
123  auto p = static_cast<float*>(x.storage().data());
124  for (size_t i = 0; i < size; i++) {
125  p[i] = (size - i) / size;
126  }
127 
128  auto out = model->forward(x);
129  ASSERT_EQ(out.output.ndimension(), 3);
130  ASSERT_EQ(out.output.size(0), 3);
131  ASSERT_EQ(out.output.size(1), 4);
132  ASSERT_EQ(out.output.size(2), 2);
133 
134  auto flat = out.output.view(3 * 4 * 2);
135  float c_out[] = {0.4391, 0.5402, 0.4330, 0.5324, 0.4261, 0.5239,
136  0.4183, 0.5147, 0.6822, 0.8064, 0.6726, 0.7968,
137  0.6620, 0.7860, 0.6501, 0.7741, 0.7889, 0.9003,
138  0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666};
139  for (size_t i = 0; i < 3 * 4 * 2; i++) {
140  ASSERT_LT(std::abs(flat[i].item<float>() - c_out[i]), 1e-3);
141  }
142 
143  ASSERT_EQ(out.state.ndimension(), 4); // (hx, cx) x layers x B x 2
144  ASSERT_EQ(out.state.size(0), 2);
145  ASSERT_EQ(out.state.size(1), 1);
146  ASSERT_EQ(out.state.size(2), 4);
147  ASSERT_EQ(out.state.size(3), 2);
148  flat = out.state.view(16);
149  float h_out[] = {0.7889,
150  0.9003,
151  0.7769,
152  0.8905,
153  0.7635,
154  0.8794,
155  0.7484,
156  0.8666,
157  1.1647,
158  1.6106,
159  1.1425,
160  1.5726,
161  1.1187,
162  1.5329,
163  1.0931,
164  1.4911};
165  for (size_t i = 0; i < 16; i++) {
166  ASSERT_LT(std::abs(flat[i].item<float>() - h_out[i]), 1e-3);
167  }
168 }
169 
170 TEST_F(RNNTest, EndToEndLSTM) {
171  ASSERT_TRUE(test_RNN_xor<LSTM>(
172  [](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }));
173 }
174 
175 TEST_F(RNNTest, EndToEndGRU) {
176  ASSERT_TRUE(
177  test_RNN_xor<GRU>([](int s) { return GRU(GRUOptions(s, s).layers(2)); }));
178 }
179 
180 TEST_F(RNNTest, EndToEndRNNRelu) {
181  ASSERT_TRUE(test_RNN_xor<RNN>(
182  [](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }));
183 }
184 
185 TEST_F(RNNTest, EndToEndRNNTanh) {
186  ASSERT_TRUE(test_RNN_xor<RNN>(
187  [](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }));
188 }
189 
190 TEST_F(RNNTest, Sizes_CUDA) {
191  torch::manual_seed(0);
192  LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
193  model->to(torch::kCUDA);
194  auto x =
195  torch::randn({10, 16, 128}, torch::requires_grad().device(torch::kCUDA));
196  auto output = model->forward(x);
197  auto y = x.mean();
198 
199  y.backward();
200  check_lstm_sizes(output);
201 
202  auto next = model->forward(x, output.state);
203 
204  check_lstm_sizes(next);
205 
206  torch::Tensor diff = next.state - output.state;
207 
208  // Hiddens changed
209  ASSERT_GT(diff.abs().sum().item<float>(), 1e-3);
210 }
211 
212 TEST_F(RNNTest, EndToEndLSTM_CUDA) {
213  ASSERT_TRUE(test_RNN_xor<LSTM>(
214  [](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }, true));
215 }
216 
217 TEST_F(RNNTest, EndToEndGRU_CUDA) {
218  ASSERT_TRUE(test_RNN_xor<GRU>(
219  [](int s) { return GRU(GRUOptions(s, s).layers(2)); }, true));
220 }
221 
222 TEST_F(RNNTest, EndToEndRNNRelu_CUDA) {
223  ASSERT_TRUE(test_RNN_xor<RNN>(
224  [](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }, true));
225 }
226 TEST_F(RNNTest, EndToEndRNNTanh_CUDA) {
227  ASSERT_TRUE(test_RNN_xor<RNN>(
228  [](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }, true));
229 }
230 
231 TEST_F(RNNTest, PrettyPrintRNNs) {
232  ASSERT_EQ(
233  c10::str(LSTM(LSTMOptions(128, 64).layers(3).dropout(0.2))),
234  "torch::nn::LSTM(input_size=128, hidden_size=64, layers=3, dropout=0.2)");
235  ASSERT_EQ(
236  c10::str(GRU(GRUOptions(128, 64).layers(3).dropout(0.5))),
237  "torch::nn::GRU(input_size=128, hidden_size=64, layers=3, dropout=0.5)");
238  ASSERT_EQ(
239  c10::str(RNN(RNNOptions(128, 64).layers(3).dropout(0.2).tanh())),
240  "torch::nn::RNN(input_size=128, hidden_size=64, layers=3, dropout=0.2, activation=tanh)");
241 }
void backward(c10::optional< Tensor > gradient=c10::nullopt, bool keep_graph=false, bool create_graph=false)
Computes the gradient of current tensor w.r.t. graph leaves.
Definition: TensorMethods.h:49
Tensor state
The new, updated state that can be fed into the RNN in the next forward step.
Definition: rnn.h:26
The output of a single invocation of an RNN module&#39;s forward() method.
Definition: rnn.h:20
Common options for LSTM and GRU modules.
Definition: rnn.h:32
Options for RNN modules.
Definition: rnn.h:145
Tensor output
The result of applying the specific RNN algorithm to the input tensor and input state.
Definition: rnn.h:23
Definition: rnn.cpp:87
Definition: static.cpp:58