diff --git a/include/core/operator.h b/include/core/operator.h index f65aeaa7..8e1dd593 100644 --- a/include/core/operator.h +++ b/include/core/operator.h @@ -27,6 +27,7 @@ enum class OpType { Gather, ReduceMean, Reshape, + Flatten, Identity, // element wise BatchNorm = 200, diff --git a/include/operators/reshape.h b/include/operators/reshape.h new file mode 100644 index 00000000..39a909b5 --- /dev/null +++ b/include/operators/reshape.h @@ -0,0 +1,55 @@ +#pragma once + +#include "core/operator.h" + +namespace infini { +class ReshapeObj : public OperatorObj { + Shape dims; + + public: + ReshapeObj(GraphObj *graph, Tensor input, Tensor output, const Shape &dims); + + optional> inferShape(const TensorVec &inputs) const override; + + std::string toString() const override; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + + private: + vector getWorkloadVector() const override; + vector getOpAttrVector() const override; +}; + +class FlattenObj : public OperatorObj { + + public: + FlattenObj(GraphObj *graph, Tensor input, Tensor output); + + optional> inferShape(const TensorVec &inputs) const override; + + std::string toString() const override; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + + private: + vector getWorkloadVector() const override; + vector getOpAttrVector() const override; +}; + +class IdentityObj : public OperatorObj { + + public: + IdentityObj(GraphObj *graph, Tensor input, Tensor output); + + optional> inferShape(const TensorVec &inputs) const override; + + std::string toString() const override; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + + private: + vector getWorkloadVector() const override; + vector getOpAttrVector() const override; +}; + +} // namespace infini diff --git a/src/core/tensor.cc b/src/core/tensor.cc index ae02ce3d..7fe67207 100644 --- a/src/core/tensor.cc +++ b/src/core/tensor.cc @@ -112,7 +112,7 @@ bool TensorObj::equalData(const Tensor &rhs) const { IT_ASSERT(getDType() == rhs->getDType()); IT_ASSERT(runtime->isCpu()); IT_ASSERT(rhs->getRuntime()->isCpu()); - if (shape != rhs->getDims()) + if (size() != rhs->size()) return false; if (getDType() == DataType::UInt32) return equalDataImpl(getRawDataPtr(), diff --git a/src/kernels/cuda/element_wise.cc b/src/kernels/cuda/element_wise.cc index 69cefcd1..6e4130e0 100644 --- a/src/kernels/cuda/element_wise.cc +++ b/src/kernels/cuda/element_wise.cc @@ -4,12 +4,12 @@ #include "cuda/cuda_runtime.h" namespace infini { -class ElementWiseCudnn : public Kernel { +class ElementWiseCudnn : public CudaKernelWithoutConfig { virtual cudnnOpTensorOp_t getOpType() const = 0; virtual tuple getAlphBeta() const { return {1.f, 1.f, 0.f}; } - void compute(const Operator &_op, const PerfRecord &record, + void compute(const Operator &_op, const RuntimeObj *_context) const override { auto op = as(_op); auto context = dynamic_cast(_context); @@ -58,18 +58,6 @@ class ElementWiseCudnn : public Kernel { checkCudnnError(cudnnDestroyTensorDescriptor(cDesc)); checkCudnnError(cudnnDestroyOpTensorDescriptor(opDesc)); } - - void compute(const Operator &_op, - const RuntimeObj *_context) const override { - compute(_op, {}, _context); - } - // Premise: op is idempotent since it is called multiple times. - PerfRecord tune(const Operator &_op, - const RuntimeObj *_context) const override { - auto context = dynamic_cast(_context); - return make_ref(timeit([&]() { compute(_op, _context); }, - [&]() { context->sync(); })); - } }; class AddCudnn : public ElementWiseCudnn { diff --git a/src/kernels/cuda/reshape.cc b/src/kernels/cuda/reshape.cc new file mode 100644 index 00000000..7beb4798 --- /dev/null +++ b/src/kernels/cuda/reshape.cc @@ -0,0 +1,21 @@ +#include "cuda/cuda_kernel_wihtout_config.h" + +namespace infini { +class CopyCuda : public CudaKernelWithoutConfig { + void compute(const Operator &op, + const RuntimeObj *_context) const override { + auto inData = op->getInputs(0)->getRawDataPtr(); + auto outData = op->getOutputs()[0]->getRawDataPtr(); + cudaMemcpyAsync(outData, inData, op->getInputs(0)->getBytes(), + cudaMemcpyDeviceToDevice); + } +}; +// reshape/flatten/identity all act as copying from input to output. +REGISTER_KERNEL(Device::CUDA, OpType::Reshape, DataType::Float32, CopyCuda, + "Reshape_CUDA_Float32"); +REGISTER_KERNEL(Device::CUDA, OpType::Flatten, DataType::Float32, CopyCuda, + "Flatten_CUDA_Float32"); +REGISTER_KERNEL(Device::CUDA, OpType::Identity, DataType::Float32, CopyCuda, + "Identity_CUDA_Float32"); + +} // namespace infini \ No newline at end of file diff --git a/src/operators/reshape.cc b/src/operators/reshape.cc new file mode 100644 index 00000000..c4f1b710 --- /dev/null +++ b/src/operators/reshape.cc @@ -0,0 +1,104 @@ +#include "operators/reshape.h" + +namespace infini { +ReshapeObj::ReshapeObj(GraphObj *graph, Tensor input, Tensor output, + const Shape &dims) + : OperatorObj(OpType::Reshape, {input}, {output}), dims(dims) { + IT_ASSERT(checkValid(graph)); +} + +optional> ReshapeObj::inferShape(const TensorVec &inputs) const { + size_t size = 1; + for (size_t i = 0; i < dims.size(); ++i) + size *= dims.at(i); + if (size != inputs[0]->size()) + return {}; + + return {{dims}}; +} + +std::string ReshapeObj::toString() const { + std::ostringstream os; + os << "Reshape[" << getGuid() << "]"; + os << "("; + os << vecToString(inputs[0]->getDims()) << ","; + os << "dims=" << vecToString(dims) << ","; + os << "input=" << inputs[0]->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); +} + +vector ReshapeObj::getWorkloadVector() const { + vector ret = inputs[0]->getDims(); + ret.insert(ret.end(), dims.begin(), dims.end()); + ret.emplace(ret.begin(), enum_to_underlying(type)); + return ret; +} +vector ReshapeObj::getOpAttrVector() const { + vector ret = dims; + ret.emplace(ret.begin(), enum_to_underlying(type)); + return ret; +} + +FlattenObj::FlattenObj(GraphObj *graph, Tensor input, Tensor output) + : OperatorObj(OpType::Flatten, {input}, {output}) { + IT_ASSERT(checkValid(graph)); +} + +optional> FlattenObj::inferShape(const TensorVec &inputs) const { + int size = 1; + auto dims = getInputs(0)->getDims(); + for (size_t i = 0; i < dims.size(); ++i) + size *= dims.at(i); + + return {{{size}}}; +} + +std::string FlattenObj::toString() const { + std::ostringstream os; + os << "Flatten[" << getGuid() << "]"; + os << "("; + os << vecToString(inputs[0]->getDims()) << ","; + os << "input=" << inputs[0]->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); +} + +vector FlattenObj::getWorkloadVector() const { + vector ret = inputs[0]->getDims(); + ret.emplace(ret.begin(), enum_to_underlying(type)); + return ret; +} + +vector FlattenObj::getOpAttrVector() const { + return {enum_to_underlying(type)}; +} + +IdentityObj::IdentityObj(GraphObj *graph, Tensor input, Tensor output) + : OperatorObj(OpType::Identity, {input}, {output}) { + IT_ASSERT(checkValid(graph)); +} + +optional> IdentityObj::inferShape(const TensorVec &inputs) const { + return {{getInputs(0)->getDims()}}; +} + +std::string IdentityObj::toString() const { + std::ostringstream os; + os << "Identity[" << getGuid() << "]"; + os << "("; + os << vecToString(inputs[0]->getDims()) << ","; + os << "input=" << inputs[0]->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); +} + +vector IdentityObj::getWorkloadVector() const { + vector ret = inputs[0]->getDims(); + ret.emplace(ret.begin(), enum_to_underlying(type)); + return ret; +} +vector IdentityObj::getOpAttrVector() const { + return {enum_to_underlying(type)}; +} +} // namespace infini \ No newline at end of file diff --git a/test/kernels/cuda/test_cuda_reshape.cc b/test/kernels/cuda/test_cuda_reshape.cc new file mode 100644 index 00000000..6ce25a97 --- /dev/null +++ b/test/kernels/cuda/test_cuda_reshape.cc @@ -0,0 +1,98 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "cuda/cuda_runtime.h" +#include "cuda/cuda_utility.h" +#include "operators/reshape.h" + +#include "test.h" + +namespace infini { + +TEST(CUDA_Reshape, run) { + Runtime cpuRuntime = CpuRuntimeObj::getInstance(); + auto cudaRuntime = make_ref(); + + // Build input data on CPU + Tensor icpu = + make_ref(Shape{2, 3, 3, 4}, DataType::Float32, cpuRuntime); + icpu->dataMalloc(); + icpu->setData(IncrementalGenerator()); + + // Build CUDA graph + Graph g = make_ref(cudaRuntime); + auto i = g->cloneTensor(icpu); + auto op = g->addOp(i, nullptr, Shape{3, 2, 4, 3}); + + // allocate CUDA memory + g->dataMalloc(); + + // Execute on CUDA + cudaRuntime->run(g); + + // clone CUDA output to CPU + auto o = op->getOutput(); + auto ocpu = o->clone(cpuRuntime); + + // check results on CPU + EXPECT_TRUE(ocpu->equalData(icpu)); +} + +TEST(CUDA_Flatten, run) { + Runtime cpuRuntime = CpuRuntimeObj::getInstance(); + auto cudaRuntime = make_ref(); + + // Build input data on CPU + Tensor icpu = + make_ref(Shape{2, 3, 3, 4}, DataType::Float32, cpuRuntime); + icpu->dataMalloc(); + icpu->setData(IncrementalGenerator()); + + // Build CUDA graph + Graph g = make_ref(cudaRuntime); + auto i = g->cloneTensor(icpu); + auto op = g->addOp(i, nullptr); + + // allocate CUDA memory + g->dataMalloc(); + + // Execute on CUDA + cudaRuntime->run(g); + + // clone CUDA output to CPU + auto o = op->getOutput(); + auto ocpu = o->clone(cpuRuntime); + + // check results on CPU + EXPECT_TRUE(ocpu->equalData(icpu)); +} + +TEST(CUDA_Identity, run) { + Runtime cpuRuntime = CpuRuntimeObj::getInstance(); + auto cudaRuntime = make_ref(); + + // Build input data on CPU + Tensor icpu = + make_ref(Shape{2, 3, 3, 4}, DataType::Float32, cpuRuntime); + icpu->dataMalloc(); + icpu->setData(IncrementalGenerator()); + + // Build CUDA graph + Graph g = make_ref(cudaRuntime); + auto i = g->cloneTensor(icpu); + auto op = g->addOp(i, nullptr); + + // allocate CUDA memory + g->dataMalloc(); + + // Execute on CUDA + cudaRuntime->run(g); + + // clone CUDA output to CPU + auto o = op->getOutput(); + auto ocpu = o->clone(cpuRuntime); + + // check results on CPU + EXPECT_TRUE(ocpu->equalData(icpu)); +} +} // namespace infini \ No newline at end of file diff --git a/test/operators/test_reshape.cc b/test/operators/test_reshape.cc new file mode 100644 index 00000000..a944bbae --- /dev/null +++ b/test/operators/test_reshape.cc @@ -0,0 +1,39 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/reshape.h" + +#include "test.h" + +namespace infini { + +TEST(Reshape, ShapeInference) { + Runtime runtime = CpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32); + auto op = g->addOp(i, nullptr, Shape{3, 2, 4, 3}); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{3, 2, 4, 3})); + } +} +TEST(Flatten, ShapeInference) { + Runtime runtime = CpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32); + auto op = g->addOp(i, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{72})); + } +} + +TEST(Identity, ShapeInference) { + Runtime runtime = CpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i = g->addTensor({2, 3, 3, 4}, DataType::Float32); + auto op = g->addOp(i, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 3, 4})); + } +} + +} // namespace infini \ No newline at end of file