1 #include <gtest/gtest.h> 3 #include <torch/data.h> 4 #include <torch/nn/modules/batchnorm.h> 5 #include <torch/nn/modules/conv.h> 6 #include <torch/nn/modules/dropout.h> 7 #include <torch/nn/modules/linear.h> 8 #include <torch/optim/adam.h> 9 #include <torch/optim/optimizer.h> 10 #include <torch/optim/sgd.h> 11 #include <torch/types.h> 12 #include <torch/utils.h> 14 #include <test/cpp/api/support.h> 23 const double kPi = 3.1415926535898;
29 double masscart = 1.0;
30 double masspole = 0.1;
31 double total_mass = (masspole + masscart);
33 double polemass_length = (masspole * length);
34 double force_mag = 10.0;
38 double theta_threshold_radians = 12 * 2 * kPi / 360;
39 double x_threshold = 2.4;
40 int steps_beyond_done = -1;
60 state = torch::empty({4}).uniform_(-0.05, 0.05);
61 steps_beyond_done = -1;
69 void step(
int action) {
70 auto x = state[0].item<
float>();
71 auto x_dot = state[1].item<
float>();
72 auto theta = state[2].item<
float>();
73 auto theta_dot = state[3].item<
float>();
75 auto force = (action == 1) ? force_mag : -force_mag;
76 auto costheta = std::cos(theta);
77 auto sintheta = std::sin(theta);
78 auto temp = (force + polemass_length * theta_dot * theta_dot * sintheta) /
80 auto thetaacc = (gravity * sintheta - costheta * temp) /
81 (length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass));
82 auto xacc = temp - polemass_length * thetaacc * costheta / total_mass;
85 x_dot = x_dot + tau * xacc;
86 theta = theta + tau * theta_dot;
87 theta_dot = theta_dot + tau * thetaacc;
88 state = torch::tensor({x, x_dot, theta, theta_dot});
90 done = x < -x_threshold || x > x_threshold ||
91 theta < -theta_threshold_radians || theta > theta_threshold_radians ||
96 }
else if (steps_beyond_done == -1) {
98 steps_beyond_done = 0;
101 if (steps_beyond_done == 0) {
109 template <
typename M,
typename F,
typename O>
112 size_t number_of_epochs,
117 std::string mnist_path =
"mnist";
118 if (
const char* user_mnist_path = getenv(
"TORCH_CPP_TEST_MNIST_PATH")) {
119 mnist_path = user_mnist_path;
124 mnist_path, torch::data::datasets::MNIST::Mode::kTrain)
128 torch::data::make_data_loader(std::move(train_dataset), batch_size);
130 torch::Device device(with_cuda ? torch::kCUDA : torch::kCPU);
133 for (
size_t epoch = 0; epoch < number_of_epochs; epoch++) {
135 auto data = batch.data.
to(device), targets = batch.target.
to(device);
137 torch::Tensor loss = torch::nll_loss(prediction, std::move(targets));
138 AT_ASSERT(!torch::isnan(loss).any().item<int64_t>());
139 optimizer.zero_grad();
147 mnist_path, torch::data::datasets::MNIST::Mode::kTest);
148 auto images = test_dataset.
images().to(device),
149 targets = test_dataset.
targets().to(device);
151 auto result = std::get<1>(forward_op(images).max(1));
152 torch::Tensor correct = (result == targets).to(torch::kFloat32);
153 return correct.sum().item<
float>() > (test_dataset.
size().value() * 0.8);
159 torch::manual_seed(0);
160 auto model = std::make_shared<SimpleContainer>();
161 auto linear = model->add(Linear(4, 128),
"linear");
162 auto policyHead = model->add(Linear(128, 2),
"policy");
163 auto valueHead = model->add(Linear(128, 1),
"action");
166 std::vector<torch::Tensor> saved_log_probs;
167 std::vector<torch::Tensor> saved_values;
168 std::vector<float> rewards;
171 auto x = linear->forward(inp).clamp_min(0);
174 return std::make_tuple(torch::softmax(actions, -1), value);
179 auto out = forward(state);
182 auto action = probs.multinomial(1)[0].item<int32_t>();
185 auto p = probs / probs.sum(-1,
true);
186 auto log_prob = p[action].log();
187 saved_log_probs.emplace_back(log_prob);
188 saved_values.push_back(value);
192 auto finishEpisode = [&] {
194 for (
int i = rewards.size() - 1; i >= 0; i--) {
195 R = rewards[i] + 0.99 * R;
198 auto r_t = torch::from_blob(
199 rewards.data(), {
static_cast<int64_t
>(rewards.size())});
200 r_t = (r_t - r_t.mean()) / (r_t.std() + 1e-5);
202 std::vector<torch::Tensor> policy_loss;
203 std::vector<torch::Tensor> value_loss;
204 for (
auto i = 0U; i < saved_log_probs.size(); i++) {
205 auto r = rewards[i] - saved_values[i].item<
float>();
206 policy_loss.push_back(-r * saved_log_probs[i]);
207 value_loss.push_back(
208 torch::smooth_l1_loss(saved_values[i], torch::ones(1) * rewards[i]));
212 torch::stack(policy_loss).sum() + torch::stack(value_loss).sum();
214 optimizer.zero_grad();
219 saved_log_probs.clear();
220 saved_values.clear();
224 double running_reward = 10.0;
225 for (
size_t episode = 0;; episode++) {
227 auto state = env.getState();
229 for (; t < 10000; t++) {
230 auto action = selectAction(state);
232 state = env.getState();
233 auto reward = env.getReward();
234 auto done = env.isDone();
236 rewards.push_back(reward);
241 running_reward = running_reward * 0.99 + t * 0.01;
249 if (running_reward > 150) {
252 ASSERT_LT(episode, 3000);
257 torch::manual_seed(0);
258 auto model = std::make_shared<SimpleContainer>();
259 auto conv1 = model->add(Conv2d(1, 10, 5),
"conv1");
260 auto conv2 = model->add(Conv2d(10, 20, 5),
"conv2");
261 auto drop = Dropout(0.3);
262 auto drop2d = FeatureDropout(0.3);
263 auto linear1 = model->add(Linear(320, 50),
"linear1");
264 auto linear2 = model->add(Linear(50, 10),
"linear2");
267 x = torch::max_pool2d(conv1->forward(x), {2, 2}).relu();
268 x = conv2->forward(x);
269 x = drop2d->forward(x);
270 x = torch::max_pool2d(x, {2, 2}).relu();
272 x = x.view({-1, 320});
273 x = linear1->forward(x).clamp_min(0);
274 x = drop->forward(x);
275 x = linear2->forward(x);
276 x = torch::log_softmax(x, 1);
283 ASSERT_TRUE(test_mnist(
293 torch::manual_seed(0);
294 auto model = std::make_shared<SimpleContainer>();
295 auto conv1 = model->add(Conv2d(1, 10, 5),
"conv1");
296 auto batchnorm2d = model->add(BatchNorm(10),
"batchnorm2d");
297 auto conv2 = model->add(Conv2d(10, 20, 5),
"conv2");
298 auto linear1 = model->add(Linear(320, 50),
"linear1");
299 auto batchnorm1 = model->add(BatchNorm(50),
"batchnorm1");
300 auto linear2 = model->add(Linear(50, 10),
"linear2");
303 x = torch::max_pool2d(conv1->forward(x), {2, 2}).relu();
304 x = batchnorm2d->forward(x);
305 x = conv2->forward(x);
306 x = torch::max_pool2d(x, {2, 2}).relu();
308 x = x.view({-1, 320});
309 x = linear1->forward(x).clamp_min(0);
310 x = batchnorm1->forward(x);
311 x = linear2->forward(x);
312 x = torch::log_softmax(x, 1);
319 ASSERT_TRUE(test_mnist(
optional< size_t > size() const override
Returns the size of the dataset.
void backward(c10::optional< Tensor > gradient=c10::nullopt, bool keep_graph=false, bool create_graph=false)
Computes the gradient of current tensor w.r.t. graph leaves.
MapDataset< Self, TransformType > map(TransformType transform)&
Creates a MapDataset that applies the given transform to this dataset.
An Example from a dataset.
const Tensor & targets() const
Returns all targets stacked into a single tensor.
Represents a a compute device on which a tensor is located.
virtual void to(torch::Device device, torch::Dtype dtype, bool non_blocking=false)
Recursively casts all parameters to the given dtype and device.
const Tensor & images() const
Returns all images stacked into a single tensor.