1 #include <gtest/gtest.h> 3 #include <torch/nn/modules/linear.h> 4 #include <torch/nn/modules/rnn.h> 5 #include <torch/optim/adam.h> 6 #include <torch/types.h> 7 #include <torch/utils.h> 9 #include <test/cpp/api/support.h> 14 template <
typename R,
typename Func>
15 bool test_RNN_xor(Func&& model_maker,
bool cuda =
false) {
16 torch::manual_seed(0);
19 auto model = std::make_shared<SimpleContainer>();
20 auto l1 = model->add(Linear(1, nhid),
"l1");
21 auto rnn = model->add(model_maker(nhid),
"rnn");
22 auto lo = model->add(Linear(nhid, 1),
"lo");
28 x = x.view({
T *
B, 1});
29 x = l1->forward(x).view({
T,
B, nhid}).tanh_();
30 x = rnn->forward(x).output[
T - 1];
36 model->to(torch::kCUDA);
39 float running_loss = 1;
41 auto max_epoch = 1500;
42 while (running_loss > 1e-2) {
46 const auto backend = cuda ? torch::kCUDA : torch::kCPU;
48 torch::rand({nlen, bs, 1}, backend).round().to(torch::kFloat32);
49 auto labels = inputs.sum(0).detach();
50 inputs.set_requires_grad(
true);
52 auto outputs = forward_op(inputs);
55 optimizer.zero_grad();
59 running_loss = running_loss * 0.99 + loss.item<
float>() * 0.01;
60 if (epoch > max_epoch) {
72 ASSERT_EQ(output.
output.ndimension(), 3);
73 ASSERT_EQ(output.
output.size(0), 10);
74 ASSERT_EQ(output.
output.size(1), 16);
75 ASSERT_EQ(output.
output.size(2), 64);
77 ASSERT_EQ(output.
state.ndimension(), 4);
78 ASSERT_EQ(output.
state.size(0), 2);
79 ASSERT_EQ(output.
state.size(1), 3);
80 ASSERT_EQ(output.
state.size(2), 16);
81 ASSERT_EQ(output.
state.size(3), 64);
84 ASSERT_GT(output.
state.norm().item<
float>(), 0);
89 TEST_F(
RNNTest, CheckOutputSizes) {
90 LSTM model(
LSTMOptions(128, 64).layers(3).dropout(0.2));
92 auto x = torch::randn({10, 16, 128}, torch::requires_grad());
93 auto output = model->forward(x);
97 check_lstm_sizes(output);
99 auto next = model->forward(x, output.state);
101 check_lstm_sizes(next);
106 ASSERT_GT(diff.abs().sum().item<
float>(), 1e-3);
109 TEST_F(
RNNTest, CheckOutputValuesMatchPyTorch) {
110 torch::manual_seed(0);
113 for (
auto& v : model->parameters()) {
114 float size = v.numel();
115 auto p =
static_cast<float*
>(v.storage().data());
116 for (
size_t i = 0; i < size; i++) {
121 auto x = torch::empty({3, 4, 2}, torch::requires_grad());
122 float size = x.numel();
123 auto p =
static_cast<float*
>(x.storage().data());
124 for (
size_t i = 0; i < size; i++) {
125 p[i] = (size - i) / size;
128 auto out = model->forward(x);
129 ASSERT_EQ(out.output.ndimension(), 3);
130 ASSERT_EQ(out.output.size(0), 3);
131 ASSERT_EQ(out.output.size(1), 4);
132 ASSERT_EQ(out.output.size(2), 2);
134 auto flat = out.output.view(3 * 4 * 2);
135 float c_out[] = {0.4391, 0.5402, 0.4330, 0.5324, 0.4261, 0.5239,
136 0.4183, 0.5147, 0.6822, 0.8064, 0.6726, 0.7968,
137 0.6620, 0.7860, 0.6501, 0.7741, 0.7889, 0.9003,
138 0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666};
139 for (
size_t i = 0; i < 3 * 4 * 2; i++) {
140 ASSERT_LT(std::abs(flat[i].item<float>() - c_out[i]), 1e-3);
143 ASSERT_EQ(out.state.ndimension(), 4);
144 ASSERT_EQ(out.state.size(0), 2);
145 ASSERT_EQ(out.state.size(1), 1);
146 ASSERT_EQ(out.state.size(2), 4);
147 ASSERT_EQ(out.state.size(3), 2);
148 flat = out.state.view(16);
149 float h_out[] = {0.7889,
165 for (
size_t i = 0; i < 16; i++) {
166 ASSERT_LT(std::abs(flat[i].item<float>() - h_out[i]), 1e-3);
170 TEST_F(
RNNTest, EndToEndLSTM) {
171 ASSERT_TRUE(test_RNN_xor<LSTM>(
172 [](
int s) {
return LSTM(
LSTMOptions(s, s).layers(2)); }));
177 test_RNN_xor<GRU>([](
int s) {
return GRU(
GRUOptions(s, s).layers(2)); }));
180 TEST_F(
RNNTest, EndToEndRNNRelu) {
181 ASSERT_TRUE(test_RNN_xor<RNN>(
182 [](
int s) {
return RNN(
RNNOptions(s, s).relu().layers(2)); }));
185 TEST_F(
RNNTest, EndToEndRNNTanh) {
186 ASSERT_TRUE(test_RNN_xor<RNN>(
187 [](
int s) {
return RNN(
RNNOptions(s, s).tanh().layers(2)); }));
191 torch::manual_seed(0);
192 LSTM model(
LSTMOptions(128, 64).layers(3).dropout(0.2));
193 model->to(torch::kCUDA);
195 torch::randn({10, 16, 128}, torch::requires_grad().device(torch::kCUDA));
196 auto output = model->forward(x);
200 check_lstm_sizes(output);
202 auto next = model->forward(x, output.state);
204 check_lstm_sizes(next);
209 ASSERT_GT(diff.abs().sum().item<
float>(), 1e-3);
212 TEST_F(
RNNTest, EndToEndLSTM_CUDA) {
213 ASSERT_TRUE(test_RNN_xor<LSTM>(
214 [](
int s) {
return LSTM(
LSTMOptions(s, s).layers(2)); },
true));
217 TEST_F(
RNNTest, EndToEndGRU_CUDA) {
218 ASSERT_TRUE(test_RNN_xor<GRU>(
219 [](
int s) {
return GRU(
GRUOptions(s, s).layers(2)); },
true));
222 TEST_F(
RNNTest, EndToEndRNNRelu_CUDA) {
223 ASSERT_TRUE(test_RNN_xor<RNN>(
224 [](
int s) {
return RNN(
RNNOptions(s, s).relu().layers(2)); },
true));
226 TEST_F(
RNNTest, EndToEndRNNTanh_CUDA) {
227 ASSERT_TRUE(test_RNN_xor<RNN>(
228 [](
int s) {
return RNN(
RNNOptions(s, s).tanh().layers(2)); },
true));
231 TEST_F(
RNNTest, PrettyPrintRNNs) {
233 c10::str(LSTM(
LSTMOptions(128, 64).layers(3).dropout(0.2))),
234 "torch::nn::LSTM(input_size=128, hidden_size=64, layers=3, dropout=0.2)");
236 c10::str(GRU(
GRUOptions(128, 64).layers(3).dropout(0.5))),
237 "torch::nn::GRU(input_size=128, hidden_size=64, layers=3, dropout=0.5)");
239 c10::str(RNN(
RNNOptions(128, 64).layers(3).dropout(0.2).tanh())),
240 "torch::nn::RNN(input_size=128, hidden_size=64, layers=3, dropout=0.2, activation=tanh)");
void backward(c10::optional< Tensor > gradient=c10::nullopt, bool keep_graph=false, bool create_graph=false)
Computes the gradient of current tensor w.r.t. graph leaves.
Tensor state
The new, updated state that can be fed into the RNN in the next forward step.
The output of a single invocation of an RNN module's forward() method.
Common options for LSTM and GRU modules.
Tensor output
The result of applying the specific RNN algorithm to the input tensor and input state.