doxygen-c/html/recurrent__network__executor_8h_source.html

 #ifndef CAFFE2_OPERATORS_RECURRENT_NETWORK_EXECUTOR_H_
 #define CAFFE2_OPERATORS_RECURRENT_NETWORK_EXECUTOR_H_

 #include <map>
 #include <unordered_set>
 #include <vector>

 #include "caffe2/core/context.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/core/timer.h"
 #include "caffe2/operators/rnn/recurrent_network_executor_incl.h"

 namespace caffe2 {

 class RecurrentNetworkExecutorBase {
  protected:
   explicit RecurrentNetworkExecutorBase(
       const NetDef& step_net_def,
       std::map<string, string>& recurrent_input_map,
       std::string timestep_blob)
       : step_net_def_(step_net_def),
         recurrent_input_map_(recurrent_input_map),
         timestep_blob_(timestep_blob) {
     const bool net_def_has_device_option = step_net_def_.has_device_option();
     for (int i = 0; i < step_net_def_.op_size(); i++) {
       if (!step_net_def_.op(i).has_device_option() &&
           net_def_has_device_option) {
         // In the case that the operator def does not specify a device option
         // but the net def has a default option, we copy the device option over
         // to the operator def.
         step_net_def_.mutable_op(i)->mutable_device_option()->CopyFrom(
             step_net_def_.device_option());
       }
       op_deps_.push_back(op_deps(i));
     }
   }

  public:
   virtual ~RecurrentNetworkExecutorBase() {
     if (debug_) {
       if (timestep_ops_.size() > 0) {
         PrintInfo(0);
       }
     }
   }

   virtual bool Run(int T) = 0;

   virtual bool RunBackwards(int T) = 0;

   void EnsureTimestepInitialized(
       int t,
       Workspace* ws,
       const std::vector<std::unique_ptr<ObserverBase<OperatorBase>>>&
           observers_list) {
     if (timestep_ops_template_.size() == 0) {
       // Firsrt invocation -- compute dependencies
       CalculateInternalDependencies();

       // Label ops based on whether they contain reference to the timestep
       // blob. This is an optimization to avoid string comparisons later.
       for (auto& rnn_op : timestep_ops_template_) {
         rnn_op.has_timestep_blob = false;
         const OperatorDef& op = step_net_def_.op(rnn_op.order);
         for (int i = 0; i < op.input_size(); i++) {
           if (op.input(i) == timestep_blob_) {
             rnn_op.has_timestep_blob = true;
             break;
           }
         }
         CAFFE_ENFORCE(
             !HasOutput(op, timestep_blob_),
             "Timestep cannot be output of an op: ",
             timestep_blob_,
             " op=" + ProtoDebugString(op));
       }
     }

     // Initialize timestep if it is not initialized
     if (timestep_ops_.size() <= t ||
         (timestep_ops_.size() > t && timestep_ops_[t].size() == 0)) {
       // Initialize empty timestep ops vectors for each timestep preceding
       // this.
       for (int j = timestep_ops_.size(); j < t + 1; j++) {
         timestep_ops_.push_back(std::vector<RNNNetOperator>());
         timestep_ops_.back().reserve(timestep_ops_template_.size());
       }

       // Keep track of workspaces for optimization in forward-only case
       if (workspaces_.size() < t + 1) {
         workspaces_.resize(t + 1);
       }
       workspaces_[t] = ws;

       // Create a specific timestep blob for this timestep. This is to
       // avoid conflicting timestep blobs when reusing workspaces, as with
       // the forward-only mode.
       std::string this_timestep_blob =
           timestep_blob_ + "_rnnexec_t" + c10::to_string(t);
       BlobGetMutableTensor(ws->CreateBlob(this_timestep_blob), CPU)->Resize(1);
       auto b = ws->GetBlob(this_timestep_blob);
       CAFFE_ENFORCE(b);
       BlobGetMutableTensor(b, CPU)->template mutable_data<int32_t>()[0] = t;

       // Copy the operators from template
       for (auto& template_rnn_op : timestep_ops_template_) {
         auto& rnn_op = template_rnn_op;

         // For ops that have the timestep blob as an input we need to
         // create a new operator definition with the timestep-specific
         // timestep blob. This is required to avoid race conditions when
         // multiple timesteps execute in paralle.
         if (rnn_op.has_timestep_blob) {
           OperatorDef op_copy = step_net_def_.op(rnn_op.order);

           for (int i = 0; i < op_copy.input_size(); i++) {
             if (op_copy.input(i) == timestep_blob_) {
               op_copy.set_input(i, this_timestep_blob);
             }
           }

           rnn_op.op = CreateOperator(op_copy, ws);
           for (const auto& observer : observers_list) {
             std::unique_ptr<ObserverBase<OperatorBase>> rnn_observer_copy =
                 observer.get()->rnnCopy(rnn_op.op.get(), rnn_op.order);
             if (rnn_observer_copy) {
               rnn_op.op->AttachObserver(std::move(rnn_observer_copy));
             }
           }
         } else {
           // Optimization for forward-only models when we can share workspaces
           // with timesteps: then we can just copy the op reference.
           if (t > max_parallel_timesteps_ && max_parallel_timesteps_ > 0 &&
               workspaces_[t - max_parallel_timesteps_] == ws) {
             rnn_op.op =
                 timestep_ops_[t - max_parallel_timesteps_][rnn_op.order].op;
           } else {
             // Otherwise, we need to create a brand new op with the workspace
             // owned by this timestep.
             rnn_op.op = CreateOperator(step_net_def_.op(rnn_op.order), ws);
             for (const auto& observer : observers_list) {
               std::unique_ptr<ObserverBase<OperatorBase>> rnn_observer_copy =
                   observer.get()->rnnCopy(rnn_op.op.get(), rnn_op.order);
               if (rnn_observer_copy) {
                 rnn_op.op->AttachObserver(std::move(rnn_observer_copy));
               }
             }
           }
         }
         rnn_op.op->DisableEvent();

         timestep_ops_[t].emplace_back(rnn_op);
       }
     }
   }

   void SetMaxParallelTimesteps(int p) {
     max_parallel_timesteps_ = p;
   }

   size_t NumObserversStepNet() {
     size_t num = 0;
     for (auto& ops_at_timestep_t : timestep_ops_) {
       for (auto& rnn_op : ops_at_timestep_t) {
         num += rnn_op.op->NumObservers();
       }
     }
     return num;
   }

  private:
   // Utility method to check if any of the op inputs or control inputs
   // contain given blob 'input'
   bool has_input(std::string x, int opidx) {
     for (auto& inp : step_net_def_.op(opidx).input()) {
       if (inp == x) {
         return true;
       }
     }
     for (auto& inp : step_net_def_.op(opidx).control_input()) {
       if (inp == x) {
         return true;
       }
     }
     return false;
   }

   // Return all outbound dependencies of an op. Special case for
   // rnn dependencies, that are set in recurent_network_op.
   std::vector<string> op_deps(int i) {
     std::vector<string> outs;
     auto& opdef = step_net_def_.op(i);
     for (string o : opdef.output()) {
       outs.push_back(o);
     };
     for (auto& arg : opdef.arg()) {
       if (arg.name().find("rnn_dependency") == 0) {
         outs.push_back(arg.s());
       }
     }
     return outs;
   }

   void infer_dependencies(
       int start_i,
       std::unordered_set<string> outputs,
       std::vector<RNNNetOperator>& rnn_ops,
       std::unordered_set<int>* dep_ops) {
     std::unordered_set<int> already_accounted_deps;
     int num_ops = step_net_def_.op_size();
     bool ignore_links = this->ignoreLinkDependencies();
     for (int j = 0; j < num_ops - 1 && !outputs.empty(); j++) {
       int i = (start_i + j) % num_ops;
       if (ignore_links && rnn_ops[i].link_op) {
         continue;
       }
       for (auto& outp : outputs) {
         if (has_input(outp, i)) {
           if (already_accounted_deps.find(i) == already_accounted_deps.end()) {
             dep_ops->insert(i);
           }

           // Now we can take the deps of this ops and not
           // add them anymore
           for (int odep : rnn_ops[i].dependencies) {
             already_accounted_deps.insert(odep);
           }
           for (string& dep_out : op_deps_[i]) {
             auto oit = outputs.find(dep_out);
             if (oit != outputs.end()) {
               // This op produces output of the orignal op, so the dependency
               // passed through that op
               outputs.erase(oit);
             }
           }
           break;
         }
       }
     }
   }

   void add_race_conflict_dependencies(
       int opidx,
       std::vector<RNNNetOperator>& rnn_ops,
       std::unordered_set<int>* dep_ops) {
     for (int i = 0; i < rnn_ops.size(); i++) {
       if (i == opidx) {
         continue;
       }
       if (rnn_ops[i].link_op && this->ignoreLinkDependencies()) {
         continue;
       }
       for (auto& dep_blob : op_deps_[i]) {
         for (auto& inp : step_net_def_.op(opidx).input()) {
           if (inp == dep_blob) {
             dep_ops->insert(i);
             break;
           }
         }
         if (i < opidx) {
           for (auto& outp : step_net_def_.op(opidx).output()) {
             if (outp == dep_blob) {
               dep_ops->insert(i);
               break;
             }
           }
         }
       }
     }
   }

   void CalculateInternalDependencies() {
     for (int i = 0; i < step_net_def_.op_size(); i++) {
       timestep_ops_template_.push_back(RNNNetOperator(step_net_def_.op(i), i));
     }
     // Then see which outputs appear as inputs, and those are
     // the internal blobs.
     for (auto& rnn_op : timestep_ops_template_) {
       std::unordered_set<string> dep_outputs;
       for (auto& outp : op_deps_[rnn_op.order]) {
         dep_outputs.insert(outp);
       }

       // Add recurrent dependencies as 'outputs' for this op
       for (auto& outp : dep_outputs) {
         auto rit = recurrent_input_map_.find(outp);
         if (rit != recurrent_input_map_.end()) {
           dep_outputs.insert(rit->second);
         } else {
           dep_outputs.insert(outp);
         }
       }

       // Compute dependencies of this op.
       if (!rnn_op.link_op || !this->ignoreLinkDependencies()) {
         std::unordered_set<int> dependent_ops;
         infer_dependencies(
             rnn_op.order + 1,
             dep_outputs,
             timestep_ops_template_,
             &dependent_ops);

         // Race conditions arise when operator writes a blob that is
         // being read by another.
         if (!this->ignoreLinkDependencies()) {
           add_race_conflict_dependencies(
             rnn_op.order, timestep_ops_template_, &dependent_ops);
         }

         for (int i : dependent_ops) {
           rnn_op.dependencies.push_back(i);
         }

         // Sort in ascending order of dependency distance. If op
         // j > i, then distance is j - i. But if j < i, then distance
         // from i to j passes the timestep boundary and is j + num ops - i.
         std::sort(
             rnn_op.dependencies.begin(),
             rnn_op.dependencies.end(),
             [&](const int& a, const int& b) {
               if (a < rnn_op.order && b < rnn_op.order) {
                 return a < b;
               }
               if (a >= rnn_op.order && b >= rnn_op.order) {
                 return a < b;
               }
               if (a >= rnn_op.order && b < rnn_op.order) {
                 return true;
               }
               return false;
             });
       }
     }

     // Update dependency counts
     for (auto& rnn_op : timestep_ops_template_) {
       for (int i : rnn_op.dependencies) {
         timestep_ops_template_[i].num_dynamic_inputs++;

         if (i > rnn_op.order) {
           timestep_ops_template_[i].frontier = false;
         } else {
           timestep_ops_template_[i].num_recurrent_inputs++;
         }
       }
     }
     // Find ops that have no recurrent inputs, and bind them
     // to the last op of the timestep. If there is only one op
     // in the step net, then it will depend on itself. Note that
     // we do not increase the dynamic input counter.
     for (auto& rnn_op : timestep_ops_template_) {
       if (rnn_op.num_dynamic_inputs == 0 && rnn_op.num_recurrent_inputs == 0) {
         if (rnn_op.link_op && this->ignoreLinkDependencies()) {
           continue;
         }
         timestep_ops_template_.back().dependencies.push_back(rnn_op.order);
       }
     }

     // compute parents
     for (auto& rnn_op : timestep_ops_template_) {
       for (int dep : rnn_op.dependencies) {
         timestep_ops_template_[dep].parents.push_back(rnn_op.order);
       }
     }
     AnalyzeOps();
   }

  protected:
   void PrintInfo(int t) {
     auto& rnn_ops = timestep_ops_[t];

     LOG(INFO) << "Timestep: " << t;
     for (auto& rnn_op : rnn_ops) {
       auto& op = rnn_op.op;
       LOG(INFO) << "Operator " << rnn_op.order << ": " << op->type()
                 << " dep inputs:" << rnn_op.num_dynamic_inputs
                 << " rec inputs:" << rnn_op.num_recurrent_inputs
                 << " frontier: " << rnn_op.frontier;
       for (auto& inp : rnn_op.op->debug_def().input()) {
         LOG(INFO) << " ---- input: " << inp;
       }
       for (auto& outp : rnn_op.op->debug_def().output()) {
         LOG(INFO) << " ---- output: " << outp;
       }
       for (auto j : rnn_op.dependencies) {
         LOG(INFO) << " dep: " << j << ": " << rnn_ops[j].op->type();
       }
       for (auto j : rnn_op.parents) {
         LOG(INFO) << " parent: " << j << ": " << rnn_ops[j].op->type();
       }
     }

     LOG(INFO) << "recurrent_inputs:" << recurrent_input_map_;

     for (auto& rnn_op : rnn_ops) {
       LOG(INFO) << "Operator " << rnn_op.order;
       LOG(INFO) << ProtoDebugString(rnn_op.op->debug_def());
     }
   }

   virtual void AnalyzeOps() {}

   virtual bool ignoreLinkDependencies() = 0;

   std::vector<std::vector<RNNNetOperator>> timestep_ops_;
   std::vector<OperatorBase*> op_ptrs_;

   std::vector<RNNNetOperator> timestep_ops_template_;

   NetDef step_net_def_;
   std::vector<std::vector<string>> op_deps_;
   std::vector<Workspace*> workspaces_;
   std::map<string, string> recurrent_input_map_;
   std::string timestep_blob_;

   int max_parallel_timesteps_ = -1;

  public:
   bool debug_ = false;
 };

 template <class Context>
 std::unique_ptr<RecurrentNetworkExecutorBase> createRNNExecutor(
     const NetDef& step_net_def,
     std::map<string, string>& recurrent_input_map,
     std::string timestep_blob,
     ArgumentHelper rnn_args);

 class CAFFE2_API ThreadedRecurrentNetworkExecutor : public RecurrentNetworkExecutorBase {
  public:
   ThreadedRecurrentNetworkExecutor(
       const NetDef& step_net_def,
       std::map<string, string>& recurrent_input_map,
       std::string timestep_blob)
       : RecurrentNetworkExecutorBase(step_net_def, recurrent_input_map, timestep_blob),
         failed_(false) {}

   ~ThreadedRecurrentNetworkExecutor() {
     task_queue_.NoMoreJobs();
     VLOG(1) << "Joining workers.";
     for (auto& worker : workers_) {
       worker.join();
     }
   }

   bool Run(int T) override;

   bool RunBackwards(int T) override;

   bool ignoreLinkDependencies() override {
     return false;
   }

   void setNumThreads(int n) {
     num_threads_ = n;
   }

  private:
   void _ExecRange(int from, int to);

   void _Exec();

   void WorkerFunction();

   void RunOp(OpTask job, int thread_id);

   SimpleQueue<OpTask> task_queue_;
   std::atomic<int> countdown_;
   std::atomic<bool> failed_;
   std::atomic<int> finished_timesteps_;
   int num_ops_;
   std::mutex countdown_mtx_;
   std::condition_variable cv_;
   std::vector<std::thread> workers_;
   int num_threads_ = 4;
 };

 } // namespace caffe2

 #endif // CAFFE2_OPERATORS_RECURRENT_NETWORK_EXECUTOR_H_
caffe2::Workspace::CreateBlob
Blob * CreateBlob(const string &name)
Creates a blob of the given name.
Definition: workspace.cc:100

caffe2::RecurrentNetworkExecutorBase
RecurrentNetworkExecutor is a specialized runtime for recurrent neural networks (RNNs).
Definition: recurrent_network_executor.h:31

caffe2::RNNNetOperator
Struct for operator in a timestep and its dependenceis.
Definition: recurrent_network_executor_incl.h:13

caffe2::ObserverBase< OperatorBase >

caffe2::OpTask
Data structure for a scheduled task in the task queue.
Definition: recurrent_network_executor_incl.h:51

T
Definition: dataloader.cpp:482

caffe2::RecurrentNetworkExecutorBase::EnsureTimestepInitialized
void EnsureTimestepInitialized(int t, Workspace *ws, const std::vector< std::unique_ptr< ObserverBase< OperatorBase >>> &observers_list)
Callers must call EnsureTimestepInitialized before starting execution for each of the relevant timest...
Definition: recurrent_network_executor.h:74

caffe2::RecurrentNetworkExecutorBase::PrintInfo
void PrintInfo(int t)
For debug purposes, print the dependency structure.
Definition: recurrent_network_executor.h:418

caffe2::ArgumentHelper
A helper class to index into arguments.
Definition: proto_utils.h:200

caffe2::Workspace
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47

caffe2::Workspace::GetBlob
const Blob * GetBlob(const string &name) const
Gets the blob with the given name as a const pointer.
Definition: workspace.cc:160

caffe2::RecurrentNetworkExecutorBase::SetMaxParallelTimesteps
void SetMaxParallelTimesteps(int p)
Set limit for the number of timesteps that run in parallel.
Definition: recurrent_network_executor.h:185

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

caffe2::SimpleQueue
Definition: simple_queue.h:22

caffe2::ThreadedRecurrentNetworkExecutor
Definition: recurrent_network_executor.h:478