1 #ifndef CAFFE2_OPERATORS_RECURRENT_NETWORK_GPU_EXECUTOR_H_ 2 #define CAFFE2_OPERATORS_RECURRENT_NETWORK_GPU_EXECUTOR_H_ 4 #include "caffe2/core/context_gpu.h" 5 #include "caffe2/operators/rnn/recurrent_network_executor.h" 15 const NetDef& step_net_def,
16 std::map<string, string>& recurrent_input_map,
17 std::string timestep_blob)
23 bool Run(
int T)
override;
25 bool RunBackwards(
int T)
override;
27 bool ignoreLinkDependencies()
override {
39 has_timestep_parallelism_ =
false;
40 for (
auto& rnn_op : timestep_ops_template_) {
42 if (rnn_op.parents.size() >= 1 && i < timestep_ops_template_.size() - 1) {
43 bool only_recurrent_deps = std::all_of(
44 rnn_op.parents.begin(),
45 rnn_op.parents.end(), [&](
const int &parent) {
49 if (only_recurrent_deps) {
50 VLOG(1) <<
"Timestep parallel op: " << ProtoDebugString(step_net_def_.op(i));
51 has_timestep_parallelism_ =
true;
53 for (
int dep : rnn_op.parents) {
54 if (dep == timestep_ops_template_.size() - 1) {
57 has_timestep_parallelism_ =
false;
65 LOG(INFO) <<
"Analyzed ops for timestep parallelism: " << has_timestep_parallelism_;
70 void setMaxStreams(
int n) {
71 max_cuda_streams_ = n;
75 void _ExecRange(
int from,
int to);
77 std::vector<cudaEvent_t> events_;
78 bool has_timestep_parallelism_ =
false;
79 int max_cuda_streams_ = 2;
RecurrentNetworkExecutor is a specialized runtime for recurrent neural networks (RNNs).
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
void AnalyzeOps() override