InfiniTensor/include/operators/broadcast.h

#pragma once
#include "core/operator.h"

namespace infini {
/**
 * @brief The Broadcast operation copies an N-element buffer on the root rank to
 * all ranks.
 *
 * For more details:
 * https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/collectives.html#broadcast
 */
class BroadcastObj : public OperatorObj {
  public:
    /**
     * @brief Construct a new Broadcast object.
     *
     * @param graph The computation graph that this operator belongs to.
     * @param input The input tensor. Only root needs to initialize it with
     * data.
     * @param output The output tensor, same size as input.
     * @param root The root rank who performs the broadcast.
     */
    BroadcastObj(GraphObj *graph, Tensor input, Tensor output, int root);
    OP_CLONE(BroadcastObj);

    int numInputs() const override { return 1; }
    int numOutputs() const override { return 1; }

    optional<vector<Shape>> inferShape(const TensorVec &inputs) const override {
        return {{inputs[0]->getDims()}};
    };

    std::string toString() const override;

    int getRoot() const { return root; }

  private:
    vector<int> getWorkloadVector() const override;
    vector<int> getOpAttrVector() const override;
    vector<DataType> inferDataType(const TensorVec &inputs) const override {
        return {inputs[0]->getDType()};
    };

  protected:
    // The rank who broadcasts data among this communication group
    int root;
};

} // namespace infini
impl distributed launch with NCCL (#106) * add cmake bits about NCCL * move example to examples/NNmodel * impl NCCL communicator * add comm related function to Runtime * export runtime interface * add launch.py * use unique name to distingush the the NCCL ID file * add timeout to communicator init * expose communicator obj from runtime obj, add unit test for nccl communicator * reformat files * Add allReduce operator and cuda nccl allReduce kernel * impl model parallel for resnet * add allGather nccl kernel and operator * Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output * fix format of onnx.py * use concat following AllGather * get tensor parallel for resnet * fix format of graph_handler.cc * change BUILD_DIST default to OFF * polish code of communicator * update .gitignore * Add broadcast operator and cuda kernel * Add comments for operators * remove const of class member * move communicator to CudaRuntimeObj * Add an empty line at EOF. --------- Co-authored-by: panzezhong <panzezhong@qiyuanlab.com> Co-authored-by: Haojie Wang <haojie0429@gmail.com> 2023-09-05 09:47:35 +08:00			`#pragma once`
			`#include "core/operator.h"`

			`namespace infini {`
			`/**`
			`* @brief The Broadcast operation copies an N-element buffer on the root rank to`
			`* all ranks.`
			`*`
			`* For more details:`
			`* https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/collectives.html#broadcast`
			`*/`
			`class BroadcastObj : public OperatorObj {`
			`public:`
			`/**`
			`* @brief Construct a new Broadcast object.`
			`*`
			`* @param graph The computation graph that this operator belongs to.`
			`* @param input The input tensor. Only root needs to initialize it with`
			`* data.`
			`* @param output The output tensor, same size as input.`
			`* @param root The root rank who performs the broadcast.`
			`*/`
			`BroadcastObj(GraphObj *graph, Tensor input, Tensor output, int root);`
			`OP_CLONE(BroadcastObj);`

			`int numInputs() const override { return 1; }`
			`int numOutputs() const override { return 1; }`

			`optional<vector<Shape>> inferShape(const TensorVec &inputs) const override {`
			`return {{inputs[0]->getDims()}};`
			`};`

			`std::string toString() const override;`

			`int getRoot() const { return root; }`

			`private:`
			`vector<int> getWorkloadVector() const override;`
			`vector<int> getOpAttrVector() const override;`
			`vector<DataType> inferDataType(const TensorVec &inputs) const override {`
			`return {inputs[0]->getDType()};`
			`};`

			`protected:`
			`// The rank who broadcasts data among this communication group`
			`int root;`
			`};`

			`} // namespace infini`