diff --git a/include/core/graph.h b/include/core/graph.h index f5ae6fa7..ef925aea 100644 --- a/include/core/graph.h +++ b/include/core/graph.h @@ -30,7 +30,7 @@ class GraphObj : public Object { */ template Ref addOp(Args &&...args) { Ref op = infini::make_ref(this, std::forward(args)...); - ops.push_back(op); + addOperatorAndConnect(op); return op; } @@ -40,7 +40,7 @@ class GraphObj : public Object { template Ref addOpWithOutputs(Args &&...args) { Ref op = infini::make_ref(nullptr, std::forward(args)...); - ops.push_back(op); + addOperatorAndConnect(op); return op; } @@ -55,11 +55,10 @@ class GraphObj : public Object { void dataMalloc(); private: - // TODO: updateConnection /** * @brief Add reverse connections and Op relationship in ctor. */ - void updateConnection(); + void addOperatorAndConnect(const Operator &op); // TODO: move to another class // bool exportOnnx(const char *path); diff --git a/include/core/object.h b/include/core/object.h index 0faec9f5..64cab597 100644 --- a/include/core/object.h +++ b/include/core/object.h @@ -35,7 +35,7 @@ class Object { virtual ~Object(){}; virtual string toString() const = 0; void print() { std::cout << toString() << std::endl; } - Guid getGuid() const { return guid; } + GuidBaseType getGuid() const { return guid; } }; inline std::ostream &operator<<(std::ostream &os, const Object &obj) { diff --git a/include/core/operator.h b/include/core/operator.h index a40de331..f5efc8e7 100644 --- a/include/core/operator.h +++ b/include/core/operator.h @@ -142,12 +142,11 @@ class OperatorObj : public Object { OpType type; TensorVec inputs; TensorVec outputs; - // vector> predecessors; - // vector> successors; + vector> predecessors; + vector> successors; public: - OperatorObj(OpType opType, TensorVec inputs, TensorVec outputs) - : type(opType), inputs(inputs), outputs(outputs) {} + OperatorObj(OpType opType, TensorVec inputs, TensorVec outputs); virtual optional> inferShape(const TensorVec &inputs) const = 0; virtual vector inferDataType(const TensorVec &inputs) const; @@ -177,9 +176,7 @@ class OperatorObj : public Object { bool isMemBoundOp() const; public: // getter and setter - // TensorVec getInputs() { return inputs; } const TensorVec &getInputs() const { return inputs; } - // TensorVec getOutputs() { return outputs; } const TensorVec &getOutputs() const { return outputs; } Tensor getInputs(size_t i) const { return inputs.at(i); } Tensor getOutput() const { @@ -190,6 +187,10 @@ class OperatorObj : public Object { IT_ASSERT(i < outputs.size(), "Index exceeded"); return outputs.at(i); } + void addPredecessors(const Operator &op) { predecessors.emplace_back(op); } + void addSuccessors(const Operator &op) { successors.emplace_back(op); } + OpVec getPredecessors() const { return wrefs_to_refs(predecessors); } + OpVec getSuccessors() const { return wrefs_to_refs(successors); } OpType getOpType() const { return type; } // HACK: set correct data type DataType getDType() const { return getInputs(0)->getDType(); } diff --git a/include/core/ref.h b/include/core/ref.h index 76357818..b88bca99 100644 --- a/include/core/ref.h +++ b/include/core/ref.h @@ -25,12 +25,19 @@ Ref as(const Ref &ref) { } template -std::vector> get_wref_vec(const std::vector> &vec) { - std::vector> wref_vec; - wref_vec.reserve(vec.size()); - for (const auto &ref : vec) - wref_vec.emplace_back(ref); - return wref_vec; +std::vector> refs_to_wrefs(const std::vector> &refs) { + std::vector> wrefs; + for (const auto &ref : refs) + wrefs.emplace_back(ref); + return wrefs; +} + +template +std::vector> wrefs_to_refs(const std::vector> &wrefs) { + std::vector> refs; + for (const auto &wref : wrefs) + refs.emplace_back(wref); + return refs; } } // namespace infini \ No newline at end of file diff --git a/include/core/tensor_base.h b/include/core/tensor_base.h index b6118477..5f2a69cf 100644 --- a/include/core/tensor_base.h +++ b/include/core/tensor_base.h @@ -19,8 +19,8 @@ class TensorBaseObj : public Object { int dim; DataType dtype; - vector> inputOf; - WRef outputOf; + vector> inputOf; + WRef outputOf; Blob data; Runtime runtime; @@ -44,41 +44,12 @@ class TensorBaseObj : public Object { DataType getDType() const { return dtype; } Runtime getRuntime() const { return runtime; } - // uint64_t getHash() const { return hash; } - - // void setInputOf(const OpVec &ops) { - // inputOf.clear(); - // for (const auto &op : ops) - // inputOf.emplace_back(op); - // } - // void addInputOf(Operator op) { inputOf.emplace_back(op); } - // void setOutputOf(Operator op) { outputOf = op; } - - // const OpVec &getInputOf() { return inputOf; } - // Operator *getOutputOf() { return outputOf; } + void addInputOf(const Operator &op) { inputOf.emplace_back(op); } + void setOutputOf(const Operator &op) { outputOf = op; } + OpVec getInputOf() { return wrefs_to_refs(inputOf); } + Operator getOutputOf() { return outputOf.lock(); } // std::pair getOutputOfWithIndex(); - // const Dim &getDims() const { return dims; } - // void setDims(const Dim &dms) { dims = dms; } - - // bool dataRand(int seed = 0) { - // if (data == nullptr) - // data = new VType[size()]; - // if (!random_inited) - // initFastrand(); - // // srand(seed); - // // faster rand generator; parallel - // size_t iEnd = size(); - // // std::cerr << "Init beginned " << std::endl; - // #pragma omp parallel for - // for (size_t i = 0; i < iEnd; ++i) - // data[i] = fastrand(random_seed[omp_get_thread_num() * 16]) % - // 10000; - // // std::cerr << "Init finished" << std::endl; - // computed = ComputedFull; - // return true; - // } - // bool setScalar(VType val) { // if (data == nullptr || !dims.empty()) // return false; @@ -102,35 +73,6 @@ class TensorBaseObj : public Object { // VType getScalar() { return data == nullptr ? 0 : data[0]; } - // VType getData(const Dim &ds) { - // assert(data != nullptr); - // auto offset = getOffset(ds); - // return offset == (size_t)-1 ? 0 : data[getOffset(ds)]; - // } - - // VType getData(size_t pos) { - // assert(data != nullptr); - // assert(pos < size()); - // return data[pos]; - // } - - // VType *getDataPtr() const { return data; } - - // size_t getOffset(const Dim &ds) { - // auto nDim = ds.size(); - // assert(dims.size() == nDim); - // if (ds.empty()) - // return 0; - // for (size_t i = 0; i < nDim; ++i) - // if (ds[i] < 0 || ds[i] >= dims[i]) - // return (size_t)-1; - // size_t idx = ds[0]; - // size_t dm = 0; - // while (++dm < nDim) - // idx = idx * dims[dm] + ds[dm]; - // return idx; - // } - // VType getBroadcastData(const Dim &ds) { // assert(data != nullptr); // auto offset = getBroadcastOffset(ds); @@ -155,96 +97,6 @@ class TensorBaseObj : public Object { // idx = idx * dims[i] + ds[nBroadcastDim + i]; // return idx; // } - - // void itInit() { it = Dim(dims.size(), 0); } - - // void itReset() { - // itInit(); - // for (size_t i = 0, iEnd = it.size(); i < iEnd; ++i) - // it[i] = 0; - // } - - // bool itValid() { - // if (it.size() != dims.size()) - // return false; - // for (size_t i = 0, iEnd = it.size(); i < iEnd; ++i) - // if (it[i] >= dims[i]) - // return false; - // return true; - // } - - // const Dim &itGet() { return it; } - - // void itNext() { - // auto p = it.size() - 1; - // it[p] += 1; - // while (p >= 1) { - // if (it[p] == dims[p]) { - // it[p] = 0; - // it[--p] += 1; - // } else - // break; - // } - // } - - // size_t size() const { - // size_t sz = 1; - // auto dm = dims.size(); - // while (dm > 0) - // sz *= dims[--dm]; - // return sz; - // } - - // TensorType getType() const { return type; } - // void setType(TensorType ty) { type = ty; } - - // static inline void initFastrand() { - // assert(omp_get_max_threads() <= 256); - // // srand(0); // constant seed for test - // // align random_seed to avoid false sharing - // for (int i = 0; i < 256 * 16; ++i) { - // // random_seed[i] = rand(); - // // constant random seed for test - // random_seed[i] = i; - // } - // random_inited = true; - // } - - // static inline int fastrand(int &g_seed) { - // g_seed = (214013 * g_seed + 2531011); - // return (g_seed >> 16) & 0x7FFF; - // } - - // std::vector> const *getSplittingPoints() const { - // assert(!splittingPoints.empty()); - // return &splittingPoints; - // } - - // bool setSplittingPoints(std::vector> value) { - // assert(!value.empty()); - // splittingPoints = value; - // return true; - // } - - // void printSplittingPoints() { - // if (splittingPoints.empty()) - // printf("Empty SplittingPoints"); - // else { - // printf("["); - // for (auto &vs : splittingPoints) { - // printf("["); - // for (auto v : vs) - // printf("%2d,", v); - // printf("],"); - // } - // printf("]"); - // } - // } - - // void initSplittingPoints() { - // splittingPoints.resize(getDims().size()); } - - // void printShape(); }; } // namespace infini diff --git a/src/core/graph.cc b/src/core/graph.cc index 8659d056..9b40d58d 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -2,7 +2,23 @@ namespace infini { -void GraphObj::updateConnection() { IT_TODO_HALT(); } +void GraphObj::addOperatorAndConnect(const Operator &op) { + ops.push_back(op); + for (auto &input : op->getInputs()) { + input->addInputOf(op); + if (auto pred = input->getOutputOf()) { + pred->addSuccessors(op); + op->addPredecessors(pred); + } + } + for (auto &output : op->getOutputs()) { + output->setOutputOf(op); + for (auto &succ : output->getInputOf()) { + succ->addPredecessors(op); + op->addSuccessors(succ); + } + } +} string GraphObj::toString() const { std::ostringstream oss; @@ -11,8 +27,17 @@ string GraphObj::toString() const { oss << tensor << "\n"; oss << "Graph operators:\n"; - for (const auto &op : ops) - oss << op << "\n"; + for (const auto &op : ops) { + vector preds, succs; + for (auto &o : op->getPredecessors()) + preds.emplace_back(o->getGuid()); + for (auto &o : op->getSuccessors()) + succs.emplace_back(o->getGuid()); + oss << "OP " << op->getGuid(); + oss << ", pred " << vecToString(preds); + oss << ", succ " << vecToString(succs); + oss << ", " << op << "\n"; + } return oss.str(); } diff --git a/src/core/operator.cc b/src/core/operator.cc index f37009a2..b8e69af8 100644 --- a/src/core/operator.cc +++ b/src/core/operator.cc @@ -4,6 +4,12 @@ namespace infini { +OperatorObj::OperatorObj(OpType opType, TensorVec inputs, TensorVec outputs) + : type(opType), inputs(inputs), outputs(outputs) { + for (auto &t : inputs) + IT_ASSERT(t != nullptr); +} + bool OperatorObj::isLinearOp() const { return enum_to_underlying(type) >= 100 && enum_to_underlying(type) < 200; } diff --git a/src/core/tensor.cc b/src/core/tensor.cc index 0eaad451..6ea7e7bb 100644 --- a/src/core/tensor.cc +++ b/src/core/tensor.cc @@ -1,5 +1,6 @@ #include "core/tensor.h" #include "core/blob.h" +#include "core/operator.h" #include "core/runtime.h" #include "utils/dataloader.h" @@ -13,7 +14,17 @@ VType TensorObj::getData(const Shape &pos) const { } string TensorObj::toString() const { - return "Tensor " + std::to_string(guid) + " shape " + vecToString(shape); + string ret = "Tensor " + std::to_string(guid) + ", shape " + + vecToString(shape) + ", dtype " + dtype.toString(); + vector inputOfGuid; + for (const auto &op : inputOf) + inputOfGuid.emplace_back(op.lock()->getGuid()); + if (auto o = outputOf.lock()) + ret += ", outputOf " + std::to_string(o->getGuid()); + else + ret += ", outputOf None"; + ret += ", inputOf " + vecToString(inputOfGuid); + return ret; } size_t TensorObj::getOffset(const Shape &pos) const { diff --git a/test/core/test_graph.cc b/test/core/test_graph.cc index 96240fe6..28ed0e4f 100644 --- a/test/core/test_graph.cc +++ b/test/core/test_graph.cc @@ -15,9 +15,20 @@ TEST(Graph, build_and_run) { g->dataMalloc(); i0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); w0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - g->addOpWithOutputs(i0, w0, o0); + auto matmul = g->addOpWithOutputs(i0, w0, o0); + g->print(); + // check inputOf and outputsOf for tensor + EXPECT_EQ(i0->getInputOf().size(), 1); + EXPECT_EQ(w0->getInputOf().size(), 1); + EXPECT_EQ(o0->getInputOf().size(), 0); + EXPECT_EQ(i0->getOutputOf(), nullptr); + EXPECT_EQ(w0->getOutputOf(), nullptr); + EXPECT_NE(o0->getOutputOf(), nullptr); + EXPECT_EQ(matmul->getPredecessors().size(), 0); + EXPECT_EQ(matmul->getSuccessors().size(), 0); + runtime->run(g); - // check answer + // check execution results auto ans = make_ref(Shape{1, 2, 4}, DataType::UInt32, runtime); ans->dataMalloc(); ans->copyData(vector{38, 44, 50, 56, 83, 98, 113, 128}); diff --git a/test/kernels/cuda/test_cuda_inception.cc b/test/kernels/cuda/test_cuda_inception.cc new file mode 100644 index 00000000..1e691576 --- /dev/null +++ b/test/kernels/cuda/test_cuda_inception.cc @@ -0,0 +1,84 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "cuda/cuda_runtime.h" +#include "cuda/cuda_utility.h" +#include "operators/concat.h" +#include "operators/conv.h" +#include "operators/pooling.h" +#include "operators/unary.h" + +#include "test.h" + +namespace infini { + +TEST(CUDA_Inception_v3_block, run) { + const int bs = 1, initialChannels = 192, h = 32; + + auto cudaRuntime = make_ref(); + auto g = make_ref(cudaRuntime); + auto blockInput = g->addTensor({bs, initialChannels, h, h}); + vector>> configs = + // + { + {{true, 64, 1}}, // a chain with one Conv + {{true, 48, 1}, {true, 64, 5}}, + {{true, 64, 1}, {true, 96, 3}, {true, 96, 3}}, + {{false, 192, 3}, {true, 32, 3}}, + }; + TensorVec outputs; + vector ops; + auto maxpool = + g->addOp(blockInput, nullptr, 3, 3, 1, 1, 1, 1, 1, 1); + auto chainInput = maxpool->getOutput(); + for (auto &pathConfig : configs) { + int inputChannels = initialChannels; + auto input = chainInput; + ops.emplace_back(); + for (auto &[isConv, f, r] : pathConfig) { // OpConfig + if (isConv) { + { // Add Conv + auto w = g->addTensor({f, inputChannels, r, r}); + auto conv = + g->addOp(input, w, nullptr, r / 2, r / 2); + input = conv->getOutput(); + ops.back().emplace_back(conv); + } + { // Add Relu + auto relu = g->addOp(input, nullptr); + input = relu->getOutput(); + ops.back().emplace_back(relu); + } + inputChannels = f; + } else { // Add AveragePool + auto pool = g->addOp(input, nullptr, r, r, 1, 1, + r / 2, r / 2, 1, 1); + input = pool->getOutput(); + ops.back().emplace_back(pool); + } + } + outputs.emplace_back(input); + } + auto concat = g->addOp(outputs, nullptr, 1); + g->print(); + + // check connection + EXPECT_EQ(maxpool->getSuccessors().size(), 4u); + EXPECT_EQ(chainInput->getInputOf().size(), 4u); + for (const auto &chainOps : ops) { + for (size_t i = 1; i < chainOps.size(); i++) { + auto prev = chainOps[i - 1]; + auto cur = chainOps[i]; + EXPECT_EQ(prev->getSuccessors().size(), 1u); + EXPECT_EQ(cur->getPredecessors().size(), 1u); + EXPECT_EQ(prev->getSuccessors()[0], cur); + EXPECT_EQ(prev, cur->getPredecessors()[0]); + } + } + EXPECT_EQ(concat->getPredecessors().size(), 4u); + + // TODO: check outputs + g->dataMalloc(); + cudaRuntime->run(g); +} +}; // namespace infini