3 #include <c10d/ProcessGroup.hpp> 6 #include <c10/util/Optional.h> 15 std::vector<std::vector<at::Tensor>> bucketTensors(
16 std::vector<at::Tensor>& tensors,
18 bool fineGrained =
false);
20 void distBroadcastCoalesced(
21 ProcessGroup& processGroup,
22 std::vector<at::Tensor>& tensors,
24 bool fineGrained =
false);
27 ProcessGroup& processGroup,
28 std::vector<std::vector<at::Tensor>>& parameterData,
29 std::vector<std::vector<at::Tensor>>& bufferData,
30 const std::vector<int64_t>& devices,
31 int64_t broadcastBucketSize,
32 bool broadcastBuffers);
34 std::tuple<std::shared_ptr<ProcessGroup::Work>,
at::Tensor> queueReduction(
35 ProcessGroup& processGroup,
36 std::vector<std::vector<at::Tensor>>& gradsBatch,
37 const std::vector<int64_t>& devices);
40 std::shared_ptr<ProcessGroup::Work>& reductionWork,
41 std::vector<at::Tensor>& gradsBatch,