Caffe2 - C++ API
A deep learning, cross platform ML framework
ddp.h
1 #pragma once
2 
3 #include <c10d/ProcessGroup.hpp>
4 
5 #include <ATen/ATen.h>
6 #include <c10/util/Optional.h>
7 
8 #include <cstddef>
9 #include <memory>
10 #include <tuple>
11 #include <vector>
12 
13 namespace c10d {
14 
15 std::vector<std::vector<at::Tensor>> bucketTensors(
16  std::vector<at::Tensor>& tensors,
17  int64_t bucketSize,
18  bool fineGrained = false);
19 
20 void distBroadcastCoalesced(
21  ProcessGroup& processGroup,
22  std::vector<at::Tensor>& tensors,
23  int64_t bufferSize,
24  bool fineGrained = false);
25 
26 void syncParams(
27  ProcessGroup& processGroup,
28  std::vector<std::vector<at::Tensor>>& parameterData,
29  std::vector<std::vector<at::Tensor>>& bufferData,
30  const std::vector<int64_t>& devices,
31  int64_t broadcastBucketSize,
32  bool broadcastBuffers);
33 
34 std::tuple<std::shared_ptr<ProcessGroup::Work>, at::Tensor> queueReduction(
35  ProcessGroup& processGroup,
36  std::vector<std::vector<at::Tensor>>& gradsBatch,
37  const std::vector<int64_t>& devices);
38 
39 void syncReduction(
40  std::shared_ptr<ProcessGroup::Work>& reductionWork,
41  std::vector<at::Tensor>& gradsBatch,
42  at::Tensor& gradsBatchCoalesced);
43 
44 } // namespace c10d
Definition: ddp.cpp:21