3 #include <c10d/ProcessGroup.hpp>     6 #include <c10/util/Optional.h>    15 std::vector<std::vector<at::Tensor>> bucketTensors(
    16     std::vector<at::Tensor>& tensors,
    18     bool fineGrained = 
false);
    20 void distBroadcastCoalesced(
    21     ProcessGroup& processGroup,
    22     std::vector<at::Tensor>& tensors,
    24     bool fineGrained = 
false);
    27     ProcessGroup& processGroup,
    28     std::vector<std::vector<at::Tensor>>& parameterData,
    29     std::vector<std::vector<at::Tensor>>& bufferData,
    30     const std::vector<int64_t>& devices,
    31     int64_t broadcastBucketSize,
    32     bool broadcastBuffers);
    34 std::tuple<std::shared_ptr<ProcessGroup::Work>, 
at::Tensor> queueReduction(
    35     ProcessGroup& processGroup,
    36     std::vector<std::vector<at::Tensor>>& gradsBatch,
    37     const std::vector<int64_t>& devices);
    40     std::shared_ptr<ProcessGroup::Work>& reductionWork,
    41     std::vector<at::Tensor>& gradsBatch,