doxygen-c/html/net__async__base_8h_source.html

 #ifndef CAFFE2_CORE_NET_ASYNC_BASE_H_
 #define CAFFE2_CORE_NET_ASYNC_BASE_H_

 #include "c10/core/thread_pool.h"
 #include "c10/util/Registry.h"
 #include "caffe2/core/common.h"
 #include "caffe2/core/net.h"
 #include "caffe2/core/net_dag_utils.h"
 #include "caffe2/core/prof_dag_counters.h"
 #include "caffe2/core/stats.h"
 #include "caffe2/core/timer.h"
 #include "caffe2/core/workspace.h"
 #include "caffe2/proto/caffe2_pb.h"
 #include "caffe2/proto/prof_dag.pb.h"
 #include "caffe2/utils/proto_utils.h"

 C10_DECLARE_int(caffe2_streams_per_gpu);
 C10_DECLARE_int(caffe2_net_async_max_gpus);
 C10_DECLARE_int(caffe2_net_async_max_numa_nodes);
 C10_DECLARE_int(caffe2_net_async_thread_pool_size);
 C10_DECLARE_bool(caffe2_net_async_check_stream_status);
 C10_DECLARE_bool(caffe2_net_async_use_single_pool);
 C10_DECLARE_bool(caffe2_net_async_use_per_net_pools);
 C10_DECLARE_bool(caffe2_net_async_run_root_tasks_inline);

 namespace caffe2 {

 class AsyncNetExecutorHelper;

 namespace tracing {
 class Tracer;
 }

 struct ExecutionOptions {
   explicit ExecutionOptions(const std::shared_ptr<const NetDef>& net_def);

   // number of gpu streams per gpu per cpu thread
   int streams_per_gpu_ = 1;
   // ops synchronization options
   bool finish_chain_ = false;
   bool always_schedule_child_ = false;
   // try to pick gpu stream that is not busy
   bool check_stream_status_ = false;
   // use single thread pool for all devices
   bool use_single_pool_ = false;
   // use per net instances thread pools instead of global ones
   bool use_per_net_pools_ = false;
   // whether RunAsync is blocking
   bool is_blocking_ = false;
   // prof_dag counters reporting
   bool report_stats_ = false;
   // immediately run children tasks inline whenever possible
   bool use_dfs_scheduling_ = false;
   // run net's root tasks in RunAsync thread instead of in thread pool
   bool run_root_tasks_inline_ = false;
 };

 class CAFFE2_API AsyncNetBase : public NetBase {
  public:
   AsyncNetBase(const std::shared_ptr<const NetDef>& net_def, Workspace* ws);
   ~AsyncNetBase() override;

   bool SupportsAsync() override {
     return true;
   }

   vector<OperatorBase*> GetOperators() const override {
     return operators_;
   }

   bool RunAsync() override;

   const dag_utils::ExecutionChains& TEST_execution_chains() const {
     return execution_chains_;
   }

   ProfDAGProtos GetOperatorStats() const;
   ProfDAGProtos GetPerOperatorCost() const;
   ProfDAGReport GetProfReport() const;

  protected:
   bool canSchedule(
       int chain_id,
       const std::vector<EventStatus>* status = nullptr,
       bool* parent_failed = nullptr);
   bool canSchedule(int parent_id, int child_id);

   int tasksNum() const;
   Event& event(int task_id) const;
   EventStatus query(int task_id) const;
   const std::vector<int>& children(int task_id) const;
   const std::vector<int>& parents(int task_id) const;
   int updateParentCount(int child_id);
   int getParentCount(int child_id);
   bool testAndSetScheduled(int task_id);
   int numOps(int task_id) const;

   int firstTaskOpId(int task_id) const;
   int lastTaskOpId(int task_id) const;
   const OperatorBase* firstTaskOp(int task_id) const;
   const OperatorBase* lastTaskOp(int task_id) const;
   OperatorBase* firstTaskOp(int task_id);
   OperatorBase* lastTaskOp(int task_id);

   void asyncWait(
       int task_id,
       int stream_id,
       const std::vector<int>& wait_task_ids) const;
   bool run(int task_id, int stream_id) noexcept;
   int stream(int task_id);
   TaskThreadPoolBase* pool(const DeviceOption& device_option);
   TaskThreadPoolBase* pool();

   void finishTasks(const std::unordered_set<int>& task_ids);
   void finalizeEvents();

   bool isStreamFree(int task_id, int stream_id) const;

   virtual void reset();

   bool handleRunError() override;

   // Operator/task graph
   std::vector<OperatorBase*> operators_;
   std::vector<dag_utils::OperatorNode> operator_nodes_;
   std::vector<std::vector<int>> chains_;
   std::vector<dag_utils::OpGraphNode> chain_nodes_; // chains' parents/children
   dag_utils::ExecutionChains execution_chains_; // for testing

   // Pools and streams
   std::mutex pools_mutex_;
   // first int key - device id, second - pool size, one pool per (device, size)
   typedef std::unordered_map<
       int,
       std::unordered_map<int, std::shared_ptr<TaskThreadPoolBase>>>
       PoolsMap;
   PoolsMap cpu_pools_;
   PoolsMap gpu_pools_;
   static std::vector<int>& getStreamCounters();
   int num_workers_;

   // Exception/error handling
   void handleChainError(
       int task_id,
       OperatorBase* op,
       const char* err_msg,
       bool save_exception = false) noexcept;
   std::atomic<bool> success_;

   // Tracing
   std::shared_ptr<tracing::Tracer> tracer_;

   // execution mode flags
   ExecutionOptions options_;

   ProfDAGCounters counters_;

   C10_DISABLE_COPY_AND_ASSIGN(AsyncNetBase);

  private:
   TaskThreadPoolBase*
   poolGetter(PoolsMap& pools, int device_type, int device_id, int pool_size);

   std::unique_ptr<AsyncNetExecutorHelper> helper_;

   friend class AsyncNetExecutorHelper;
   friend class tracing::Tracer;
 };

 class AsyncNetExecutorHelper : public ExecutorHelper {
  public:
   explicit AsyncNetExecutorHelper(AsyncNetBase* net) : net_(net) {}
   TaskThreadPoolBase* GetPool(const DeviceOption& option) const override {
     return net_->pool(option);
   }

  private:
   AsyncNetBase* net_;
 };

 template <class TaskThreadPoolImpl, int device_type>
 std::shared_ptr<TaskThreadPoolBase>
 GetAsyncNetThreadPool(int device_id, int pool_size, bool create_new) {
   static std::unordered_map<
       int,
       std::unordered_map<int, std::weak_ptr<TaskThreadPoolBase>>>
       pools;
   static std::mutex pool_mutex;

   const auto& device_type_name = DeviceTypeName(device_type);

   if (pool_size <= 0) {
     if (FLAGS_caffe2_net_async_thread_pool_size > 0) {
       pool_size = FLAGS_caffe2_net_async_thread_pool_size;
       LOG(INFO) << "Using default " << device_type_name
                 << " pool size: " << pool_size << "; device id: " << device_id;
     } else {
       auto num_cores = std::thread::hardware_concurrency();
       CAFFE_ENFORCE(num_cores > 0, "Failed to get number of CPU cores");
       LOG(INFO) << "Using estimated " << device_type_name
                 << " pool size: " << num_cores << "; device id: " << device_id;
       pool_size = num_cores;
     }
   } else {
     LOG(INFO) << "Using specified " << device_type_name
               << " pool size: " << pool_size << "; device id: " << device_id;
   }

   if (create_new) {
     LOG(INFO) << "Created new " << device_type_name
               << " pool, size: " << pool_size << "; device id: " << device_id;
     return std::make_shared<TaskThreadPoolImpl>(pool_size, device_id);
   } else {
     std::lock_guard<std::mutex> lock(pool_mutex);

     auto shared_pool = pools[device_id][pool_size].lock();
     if (!shared_pool) {
       LOG(INFO) << "Created shared " << device_type_name
                 << " pool, size: " << pool_size << "; device id: " << device_id;
       shared_pool = std::make_shared<TaskThreadPoolImpl>(pool_size, device_id);
       pools[device_id][pool_size] = shared_pool;
     }
     return shared_pool;
   }
 }

 } // namespace caffe2

 #endif // CAFFE2_CORE_NET_ASYNC_BASE_H_
caffe2::ProfDAGCounters
A simple wrapper around prof_dag&#39;s counters.
Definition: prof_dag_counters.h:94

caffe2::AsyncNetExecutorHelper
Definition: net_async_base.h:170

caffe2::ExecutorHelper
Definition: net.h:130

caffe2::AsyncNetBase
Definition: net_async_base.h:58

caffe2::tracing::Tracer
Definition: net_async_tracing.h:71

caffe2::Workspace
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

c10::TaskThreadPoolBase
Definition: thread_pool.h:22

caffe2::NetBase
Definition: net.h:38

caffe2::OperatorBase
Definition: operator.h:38

caffe2::ProfDAGReport
Definition: prof_dag_counters.h:52

caffe2::Event
Definition: event.h:58

caffe2::ExecutionOptions
Definition: net_async_base.h:34