feat: 添加拷贝数据的必要代码
Signed-off-by: YdrMaster <ydrml@hotmail.com>
This commit is contained in:
YdrMaster 2023-09-18 16:25:42 +08:00
parent a8f8d504f4
commit 030fdc0bd5
12 changed files with 328 additions and 256 deletions

View File

@ -1,8 +1,8 @@
#pragma once
#include "computation/graph.h"
#include "core/lazy_allocator.h"
#include "core/operator.h"
#include "core/tensor.h"
#include "computation/graph.h"
namespace infini {
@ -114,7 +114,8 @@ class GraphObj : public Object {
bool checkValid() const;
void transformFromGraphTopo(refactor::computation::Graph &graph, Runtime runtime);
void transformFromGraphTopo(refactor::computation::Graph &graph,
Runtime runtime);
private:
/**

View File

@ -10,7 +10,7 @@ typedef struct {
int wholeNDim[MAX_DIM]; // dim size after padding or before slicing
int partNDim[MAX_DIM]; // dim size before padding or after slicing
int partStride[MAX_DIM]; // stride before padding or after slicing
int DType;
int DType;
} TransMetaData;
namespace infini {

View File

@ -2,9 +2,9 @@
#ifndef OPERATOR_UTIL_H
#define OPERATOR_UTIL_H
#include "core/tensor.h"
#include "core/graph.h"
#include "computation/graph.h"
#include "core/graph.h"
#include "core/tensor.h"
namespace infini {
@ -14,16 +14,16 @@ Shape infer_broadcast(const Shape &A, const Shape &B);
int get_real_axis(const int &axis, const int &rank);
// transform RefactorGraph node to InfiniTensorGraph operator
void addOperatorFromGraphTopo(GraphObj &g,
std::shared_ptr<refactor::computation::Operator> nodeInfo,
std::vector<size_t> input, std::vector<size_t> output,
std::unordered_map<size_t, Tensor> &edgeToTensor,
std::vector<refactor::computation::Edge> edges);
void addOperatorFromGraphTopo(
GraphObj &g, std::shared_ptr<refactor::computation::Operator> nodeInfo,
std::vector<size_t> input, std::vector<size_t> output,
std::unordered_map<size_t, Tensor> &edgeToTensor,
std::vector<refactor::computation::Edge> edges);
void addEdgeToTensor(GraphObj &g, size_t index,
std::shared_ptr<refactor::computation::Tensor> tensor,
std::unordered_map<size_t, Tensor> &edgeToTensor,
Runtime runtime);
std::shared_ptr<refactor::computation::Tensor> tensor,
std::unordered_map<size_t, Tensor> &edgeToTensor,
Runtime runtime);
} // namespace infini
#endif

View File

@ -350,37 +350,63 @@ bool GraphObj::checkValid() const {
return true;
}
void GraphObj::transformFromGraphTopo(refactor::computation::Graph &graph, Runtime runtime) {
// create ops and tensors
ops.clear();
tensors.clear();
auto const& nodes = graph.internal().nodes;
auto const& edges = graph.internal().edges;
std::unordered_map<size_t, Tensor> edgeToTensor;
for (auto [nodeIdx, inputs, outputs] : graph.internal().topology) {
// not dynamic_node
if (!std::all_of(outputs.begin(), outputs.end(), [&](auto e) { return edges[e].tensor->hasData(); })) {
auto nodeInfo = nodes[nodeIdx];
IT_ASSERT(refactor::computation::OpType::tryParse(nodeInfo.op->opType.name().data()));
std::vector<size_t> in, out;
for (auto i : inputs) {
if (edgeToTensor.find(i) == edgeToTensor.end()) {
addEdgeToTensor(*this, i, edges[i].tensor, edgeToTensor, runtime);
}
in.emplace_back(i);
}
for (auto i : outputs) {
if (edgeToTensor.find(i) == edgeToTensor.end()) {
addEdgeToTensor(*this, i, edges[i].tensor, edgeToTensor, runtime);
}
out.emplace_back(i);
}
IT_ASSERT(out.size() == outputs.size());
IT_ASSERT(in.size() == inputs.size());
addOperatorFromGraphTopo(*this, nodeInfo.op, in, out, edgeToTensor, edges);
}
}
void GraphObj::transformFromGraphTopo(refactor::computation::Graph &graph,
Runtime runtime) {
// create ops and tensors
ops.clear();
tensors.clear();
auto const &nodes = graph.internal().nodes;
auto const &edges = graph.internal().edges;
std::unordered_map<size_t, Tensor> edgeToTensor;
auto it = graph.internal().topology.begin();
auto end = graph.internal().topology.end();
while (it != end) {
auto [nodeIdx, inputs, outputs] = *it++;
// not dynamic_node
if (!std::all_of(outputs.begin(), outputs.end(),
[&](auto e) { return edges[e].tensor->hasData(); })) {
auto nodeInfo = nodes[nodeIdx];
IT_ASSERT(refactor::computation::OpType::tryParse(
nodeInfo.op->opType.name().data()));
std::vector<size_t> in, out;
for (auto i : inputs) {
if (edgeToTensor.find(i) == edgeToTensor.end()) {
addEdgeToTensor(*this, i, edges[i].tensor, edgeToTensor,
runtime);
}
in.emplace_back(i);
}
for (auto i : outputs) {
if (edgeToTensor.find(i) == edgeToTensor.end()) {
addEdgeToTensor(*this, i, edges[i].tensor, edgeToTensor,
runtime);
}
out.emplace_back(i);
}
IT_ASSERT(out.size() == outputs.size());
IT_ASSERT(in.size() == inputs.size());
addOperatorFromGraphTopo(*this, nodeInfo.op, in, out, edgeToTensor,
edges);
}
}
dataMalloc();
std::unordered_set<size_t> globalOutputs;
for (auto edgeIdx : it.globalOutputs()) {
globalOutputs.insert(edgeIdx);
}
size_t i = 0;
for (auto e : graph.internal().edges) {
if (e.tensor->hasData()) {
// auto tensor = edgeToTensor[e.id];
if (globalOutputs.erase(i)) {
// tensor->setIsOutput(true);
}
// addTensor(tensor);
}
++i;
}
}
} // namespace infini

View File

@ -71,10 +71,10 @@ bool OperatorObj::checkValid(GraphObj *graph) {
} else { // if outputs have been created, check their shapes
for (size_t i = 0; i < shapes.size(); ++i) {
if (shapes[i] != outputs[i]->getDims()) {
std::cout<<"shapes"<<vecToString(shapes[i])<<std::endl;
std::cout<<vecToString(outputs[i]->getDims())<<std::endl;
IT_ASSERT(false);
}
std::cout << "shapes" << vecToString(shapes[i]) << std::endl;
std::cout << vecToString(outputs[i]->getDims()) << std::endl;
IT_ASSERT(false);
}
}
}
return true;

View File

@ -1,7 +1,7 @@
#include "common/error_handler.h"
#include "communication/operators.h"
#include "core/graph.h"
#include "computation/graph.h"
#include "core/graph.h"
#include "onnx/operators.h"
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
@ -40,17 +40,17 @@ class Handler {
fmt::format("Variable {} not exist", name));
}
auto const &graph() const { return _g.internal(); }
void runCuda() {
using namespace infini;
#ifdef USE_CUDA
void runCuda() {
using namespace infini;
auto cudaRuntime = make_ref<CudaRuntimeObj>(0);
auto graph = make_ref<GraphObj>(std::move(cudaRuntime));
graph->transformFromGraphTopo(_g, cudaRuntime);
//graph->print();
graph->dataMalloc();
graph->getRuntime()->run(graph);
}
auto cudaRuntime = make_ref<CudaRuntimeObj>();
auto graph = make_ref<GraphObj>(cudaRuntime);
graph->transformFromGraphTopo(_g, cudaRuntime);
graph->dataMalloc();
graph->getRuntime()->run(graph);
#endif
}
};
using TExport = std::tuple<Name, int, std::vector<std::variant<Name, int>>>;
@ -218,9 +218,7 @@ void register_refactor(py::module &m) {
py::class_<Handler, std::shared_ptr<Handler>>(m, "Graph")
.def("fill_edge_info", &Handler::fillEdgeInfo)
.def("substitute", &Handler::substitute)
#ifdef USE_CUDA
.def("run_cuda", &Handler::runCuda)
#endif
.def("set_input", &Handler::setInput);
py::class_<NodeExport>(m, "NodeExport")
.def(py::init<std::shared_ptr<Handler>>())
@ -236,6 +234,4 @@ void register_refactor(py::module &m) {
}
} // namespace
PYBIND11_MODULE(backend, m) {
register_refactor(m);
}
PYBIND11_MODULE(backend, m) { register_refactor(m); }

View File

@ -133,11 +133,10 @@ class ElementWiseCuda : public CudaKernelWithoutConfig {
else if (op->getOpType() == OpType::Pow)
pow_kernel(aData, bData, cData, a[0], a[1], a[2], a[3], b[0], b[1],
b[2], b[3], c[0], c[1], c[2], c[3]);
else if (op->getOpType() == OpType::Add) {
else if (op->getOpType() == OpType::Add) {
add_kernel(aData, bData, cData, a[0], a[1], a[2], a[3], b[0], b[1],
b[2], b[3], c[0], c[1], c[2], c[3]);
}
else
} else
IT_TODO_HALT();
}
};

View File

@ -5,9 +5,9 @@ constexpr unsigned int num_threads() { return 32 * 4; }
constexpr int thread_work_size() { return 4; }
constexpr int block_work_size() { return thread_work_size() * num_threads(); }
__global__ void _div_kernel(void *x, void *y, void *z, int a0, int a1,
int a2, int a3, int b0, int b1, int b2, int b3,
int c0, int c1, int c2, int c3) {
__global__ void _div_kernel(void *x, void *y, void *z, int a0, int a1, int a2,
int a3, int b0, int b1, int b2, int b3, int c0,
int c1, int c2, int c3) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
int stride = blockDim.x * gridDim.x;
int n = c0 * c1 * c2 * c3;
@ -27,17 +27,18 @@ __global__ void _div_kernel(void *x, void *y, void *z, int a0, int a1,
int b1_index = c1_index % b1;
int b2_index = c2_index % b2;
int b3_index = c3_index % b3;
((float *)z)[i] = ((float *)x)[a0_index * a1 * a2 * a3 + a1_index * a2 * a3 + a2_index * a3 +
a3_index] /
((float *)y)[b0_index * b1 * b2 * b3 + b1_index * b2 * b3 + b2_index * b3 +
b3_index];
((float *)z)[i] =
((float *)x)[a0_index * a1 * a2 * a3 + a1_index * a2 * a3 +
a2_index * a3 + a3_index] /
((float *)y)[b0_index * b1 * b2 * b3 + b1_index * b2 * b3 +
b2_index * b3 + b3_index];
}
}
template <class T>
__global__ void _add_kernel(void *x, void *y, void *z, int a0, int a1,
int a2, int a3, int b0, int b1, int b2, int b3,
int c0, int c1, int c2, int c3) {
__global__ void _add_kernel(void *x, void *y, void *z, int a0, int a1, int a2,
int a3, int b0, int b1, int b2, int b3, int c0,
int c1, int c2, int c3) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
int stride = blockDim.x * gridDim.x;
int n = c0 * c1 * c2 * c3;
@ -57,15 +58,15 @@ __global__ void _add_kernel(void *x, void *y, void *z, int a0, int a1,
int b1_index = c1_index % b1;
int b2_index = c2_index % b2;
int b3_index = c3_index % b3;
((T *)z)[i] = ((T *)x)[a0_index * a1 * a2 * a3 + a1_index * a2 * a3 + a2_index * a3 +
a3_index] +
((T *)y)[b0_index * b1 * b2 * b3 + b1_index * b2 * b3 + b2_index * b3 +
b3_index];
((T *)z)[i] = ((T *)x)[a0_index * a1 * a2 * a3 + a1_index * a2 * a3 +
a2_index * a3 + a3_index] +
((T *)y)[b0_index * b1 * b2 * b3 + b1_index * b2 * b3 +
b2_index * b3 + b3_index];
}
}
__global__ void _pow_kernel(void *x, void *y, void *z, int a0, int a1,
int a2, int a3, int b0, int b1, int b2, int b3,
int c0, int c1, int c2, int c3) {
__global__ void _pow_kernel(void *x, void *y, void *z, int a0, int a1, int a2,
int a3, int b0, int b1, int b2, int b3, int c0,
int c1, int c2, int c3) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
int stride = blockDim.x * gridDim.x;
int n = c0 * c1 * c2 * c3;
@ -85,10 +86,11 @@ __global__ void _pow_kernel(void *x, void *y, void *z, int a0, int a1,
int b1_index = c1_index % b1;
int b2_index = c2_index % b2;
int b3_index = c3_index % b3;
((float *)z)[i] = pow(((float *)x)[a0_index * a1 * a2 * a3 + a1_index * a2 * a3 +
a2_index * a3 + a3_index],
((float *)y)[b0_index * b1 * b2 * b3 + b1_index * b2 * b3 +
b2_index * b3 + b3_index]);
((float *)z)[i] =
pow(((float *)x)[a0_index * a1 * a2 * a3 + a1_index * a2 * a3 +
a2_index * a3 + a3_index],
((float *)y)[b0_index * b1 * b2 * b3 + b1_index * b2 * b3 +
b2_index * b3 + b3_index]);
}
}
@ -110,8 +112,8 @@ void add_kernel(void *a, void *b, void *c, int a0, int a1, int a2, int a3,
int blocksize = block_work_size();
int num = c0 * c1 * c2 * c3;
int gridsize = (num + block_work_size() - 1) / block_work_size();
_add_kernel<int64_t><<<gridsize, blocksize>>>(a, b, c, a0, a1, a2, a3, b0, b1, b2,
b3, c0, c1, c2, c3);
_add_kernel<int64_t><<<gridsize, blocksize>>>(a, b, c, a0, a1, a2, a3, b0,
b1, b2, b3, c0, c1, c2, c3);
}
void pow_kernel(void *a, void *b, void *c, int a0, int a1, int a2, int a3,
int b0, int b1, int b2, int b3, int c0, int c1, int c2,

View File

@ -16,7 +16,7 @@ class PadSliceCudaCompute {
metadata.partNDim[i] = partTensor->getDims()[i];
metadata.partStride[i] = partTensor->getStride()[i];
}
metadata.DType = partTensor->getDType().getIndex();
metadata.DType = partTensor->getDType().getIndex();
pad_slice_kernel(partTensor->getRawDataPtr<void *>(),
wholeTensor->getRawDataPtr<void *>(), metadata, nDims,
wholeTensor->size(), isPad);

View File

@ -1,6 +1,6 @@
#include "core/data_type.h"
#include "cuda/cuda_common.h"
#include "cuda/cuda_pad_slice.h"
#include "core/data_type.h"
__device__ int WholeTensorOffset2PartTensorOffset(int wholeOffset,
TransMetaData metaData,
@ -21,9 +21,8 @@ __device__ int WholeTensorOffset2PartTensorOffset(int wholeOffset,
}
template <typename T>
__global__ void _pad_slice_kernel(T *part, T *whole,
TransMetaData metaData, int nDims, int num,
bool isPad) {
__global__ void _pad_slice_kernel(T *part, T *whole, TransMetaData metaData,
int nDims, int num, bool isPad) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid >= num)
return;
@ -48,12 +47,13 @@ void pad_slice_kernel(void *partData, void *wholeData,
bool isPad) {
int blockSize = 32 * 16;
int gridSize = (num + blockSize - 1) / blockSize;
if (metadata.DType == DataType::Int64.getIndex()) {
_pad_slice_kernel<int64_t><<<gridSize, blockSize>>>((int64_t *)partData, (int64_t *)wholeData, metadata,
nDims, num, isPad);
} else if (metadata.DType == DataType::Float32.getIndex()) {
_pad_slice_kernel<float><<<gridSize, blockSize>>>((float*)partData, (float*)wholeData, metadata,
nDims, num, isPad);
}
if (metadata.DType == DataType::Int64.getIndex()) {
_pad_slice_kernel<int64_t>
<<<gridSize, blockSize>>>((int64_t *)partData, (int64_t *)wholeData,
metadata, nDims, num, isPad);
} else if (metadata.DType == DataType::Float32.getIndex()) {
_pad_slice_kernel<float><<<gridSize, blockSize>>>(
(float *)partData, (float *)wholeData, metadata, nDims, num, isPad);
}
}
} // namespace infini

View File

@ -46,11 +46,10 @@ SliceObj::SliceObj(GraphObj *graph, Tensor input, Tensor output,
for (size_t i = 0; i < size; ++i)
if (auto _i = axes.find(i); _i != axes.end()) {
auto __i = _i->second;
auto start = starts[__i] >= 0 ? starts[__i] : starts[__i] + shape[i];
auto end = ends[__i] >= 0 ? ends[__i] : ends[__i] + shape[i];
this->axes.push_back({start,
end,
steps[__i]});
auto start =
starts[__i] >= 0 ? starts[__i] : starts[__i] + shape[i];
auto end = ends[__i] >= 0 ? ends[__i] : ends[__i] + shape[i];
this->axes.push_back({start, end, steps[__i]});
} else {
this->axes.push_back({0, shape[i], 1});
}

View File

@ -1,3 +1,4 @@
#include "utils/operator_utils.h"
#include "operators/batch_norm.h"
#include "operators/concat.h"
#include "operators/conv.h"
@ -13,7 +14,6 @@
#include "operators/split.h"
#include "operators/transpose.h"
#include "operators/unary.h"
#include "utils/operator_utils.h"
namespace infini {
@ -57,159 +57,208 @@ int get_real_axis(const int &axis, const int &rank) {
return newAxis;
}
void addOperatorFromGraphTopo(GraphObj &g,
std::shared_ptr<refactor::computation::Operator> nodeInfo,
std::vector<size_t> input, std::vector<size_t> output,
std::unordered_map<size_t, Tensor> &edgeToTensor,
std::vector<refactor::computation::Edge> edges) {
std::string name(nodeInfo->opType.name());
auto attr = nodeInfo->attributes;
#define ELSE_IF(op) \
else if (name == "onnx::op") { \
g.addOpWithOutputs<op##Obj>(edgeToTensor[input[0]], edgeToTensor[output[0]]); \
}
if (name == "onnx::Conv") {
// auto p = attr["pads"].ints();
// auto s = attr["strides"].ints();
// auto d = attr["dilations"].ints();
// g.addOpWithOutputs<ConvObj>(edgeToTensor[input[0]], edgeToTensor[input[1]], edgeToTensor[output[0]], p[0], p[1], s[0], s[1], d[0], d[1]);
} else if (name == "onnx::Add") {
g.addOpWithOutputs<AddObj>(edgeToTensor[input[0]], edgeToTensor[input[1]], edgeToTensor[output[0]]);
} else if (name == "onnx::AveragePool") {
// auto p = attr["pads"].ints();
// auto s = attr["strides"].ints();
// auto d = attr["dilations"].ints();
// int h = edgeToTensor[input[0]]->getDims()[2];
// int w = edgeToTensor[input[0]]->getDims()[3];
// g.addOpWithOutputs<AvgPoolObj>(edgeToTensor[input[0]], edgeToTensor[output[0]], h, w,
// d[0], d[1], p[0], p[1], s[0], s[1]);
} else if (name == "onnx::Reshape") {
IT_ASSERT(input.size() == 2);
auto shapeValue = reinterpret_cast<int64_t *>(edges[input[1]].tensor->data->ptr);
auto rank = edgeToTensor[input[1]]->getDims()[0];
Shape shape(rank);
for (size_t i = 0; i < (size_t)rank; ++i) {
shape[i] = static_cast<int>(*(shapeValue + i));
}
g.addOpWithOutputs<ReshapeObj>(edgeToTensor[input[0]], edgeToTensor[output[0]], shape);
} else if (name == "onnx::Gemm") {
auto alpha = attr.find("alpha") != attr.end() ? attr["alpha"].float_() : 1.0;
auto beta = attr.find("beta") != attr.end() ? attr["beta"].float_() : 1.0;
auto transA = attr.find("transA") != attr.end() ? attr["transA"].int_() : 0;
auto transB = attr.find("transB") != attr.end() ? attr["transB"].int_() : 0;
IT_ASSERT(alpha == 1.0);
IT_ASSERT(beta == 1.0);
g.addOpWithOutputs<MatmulObj>(edgeToTensor[input[0]], edgeToTensor[input[1]], edgeToTensor[output[0]], transA, transB,
input.size() > 2 ? edgeToTensor[input[2]] : nullptr, ActType::None);
} else if (name == "onnx::Pow") {
g.addOpWithOutputs<PowerObj>(edgeToTensor[input[0]], edgeToTensor[input[1]], edgeToTensor[output[0]]);
} else if (name == "onnx::Gather") {
auto axis = attr.find("axis") != attr.end() ? attr["axis"].int_() : 0;
g.addOpWithOutputs<GatherObj>(edgeToTensor[input[0]], edgeToTensor[input[1]], edgeToTensor[output[0]], axis);
} else if (name == "onnx::Max") {
g.addOpWithOutputs<MaximumObj>(edgeToTensor[input[0]], edgeToTensor[input[1]], edgeToTensor[output[0]]);
} else if (name == "onnx::Div") {
g.addOpWithOutputs<DivObj>(edgeToTensor[input[0]], edgeToTensor[input[1]], edgeToTensor[output[0]]);
} else if (name == "onnx::Mul") {
g.addOpWithOutputs<MulObj>(edgeToTensor[input[0]], edgeToTensor[input[1]], edgeToTensor[output[0]]);
} else if (name == "onnx::Sub") {
g.addOpWithOutputs<SubObj>(edgeToTensor[input[0]], edgeToTensor[input[1]], edgeToTensor[output[0]]);
} else if (name == "onnx::Slice") {
auto startValue = reinterpret_cast<int64_t *>(edges[input[1]].tensor->data->ptr);
auto startRank = edgeToTensor[input[1]]->getRank();
auto endValue = reinterpret_cast<int64_t *>(edges[input[2]].tensor->data->ptr);
auto endRank = edgeToTensor[input[2]]->getRank();
std::vector<int> start, end, axesVal, stepsVal;
std::optional<std::vector<int>> axes, steps;
if (input.size() > 3) {
auto axesValue = reinterpret_cast<int64_t *>(edges[input[3]].tensor->data->ptr);
auto axesRank = edgeToTensor[input[3]]->getRank();
for (size_t i = 0; i < axesRank; ++i) {
axesVal.emplace_back(static_cast<int>(*(axesValue + i)));
}
axes = axesVal;
}
if (input.size() > 4) {
auto stepsValue = reinterpret_cast<int64_t *>(edges[input[4]].tensor->data->ptr);
auto stepsRank = edgeToTensor[input[4]]->getRank();
for (size_t i = 0; i < stepsRank; ++i) {
stepsVal.emplace_back(static_cast<int>(*(stepsValue + i)));
}
steps = stepsVal;
}
for (size_t i = 0; i < startRank; ++i) {
int64_t startVal = *(startValue + i);
if (axes.has_value()) {
startVal = std::min(startVal, static_cast<int64_t>(edgeToTensor[input[0]]->getDims()[axes.value()[i]]));
} else {
startVal = std::min(startVal, static_cast<int64_t>(edgeToTensor[input[0]]->getDims()[i]));
}
start.emplace_back(static_cast<int>(startVal));
}
for (size_t i = 0; i < endRank; ++i) {
int64_t endVal = *(endValue + i);
if (axes.has_value()) {
endVal = std::min(endVal, static_cast<int64_t>(edgeToTensor[input[0]]->getDims()[axes.value()[i]]));
} else {
endVal = std::min(endVal, static_cast<int64_t>(edgeToTensor[input[0]]->getDims()[i]));
}
end.emplace_back(static_cast<int>(endVal));
}
g.addOpWithOutputs<SliceObj>(edgeToTensor[input[0]], edgeToTensor[output[0]], start, end,
axes, steps);
} else if (name == "onnx::Softmax") {
auto axis = attr.find("axis") != attr.end() ? attr["axis"].int_() : -1;
g.addOpWithOutputs<SoftmaxObj>(edgeToTensor[input[0]], edgeToTensor[output[0]], axis);
} else if (name == "onnx::ReduceMean") {
auto keepdims = attr.find("keepdims") != attr.end() ? attr["keepdims"].int_() : 1;
std::vector<int> axesVal;
std::optional<std::vector<int>> axes;
if (input.size() > 1) {
auto axesValue = reinterpret_cast<int64_t *>(edges[input[1]].tensor->data->ptr);
auto axesRank = edgeToTensor[input[1]]->getRank();
for (size_t i = 0; i < axesRank; ++i) {
axesVal.emplace_back(static_cast<int>(*(axesValue + i)));
}
axes = axesVal;
}
g.addOpWithOutputs<ReduceMeanObj>(edgeToTensor[input[0]], edgeToTensor[output[0]], axes, keepdims);
} else if (name == "onnx::Concat") {
auto axis = attr["axis"].int_();
std::vector<Tensor> inputs;
for (auto i : input) {
inputs.emplace_back(edgeToTensor[i]);
}
g.addOpWithOutputs<ConcatObj>(inputs, edgeToTensor[output[0]], axis);
} else if (name == "onnx::MatMul") {
g.addOpWithOutputs<MatmulObj>(edgeToTensor[input[0]], edgeToTensor[input[1]], edgeToTensor[output[0]], false, false, nullptr, ActType::None);
} else if (name == "onnx::Transpose") {
int rank = edgeToTensor[input[0]]->getRank();
std::vector<int> permDefault;
for (int i = rank - 1; i >= 0; --i) {
permDefault.emplace_back(i);
}
std::vector<int> perm;
if (attr.find("perm") != attr.end()) {
auto permAttr = attr["perm"].ints();
for (auto e : permAttr) {
perm.emplace_back(static_cast<int>(e));
}
} else {
perm = permDefault;
}
g.addOpWithOutputs<TransposeObj>(edgeToTensor[input[0]], edgeToTensor[output[0]], perm);
}
ELSE_IF(Relu)
ELSE_IF(Sqrt)
ELSE_IF(Identity)
void addOperatorFromGraphTopo(
GraphObj &g, std::shared_ptr<refactor::computation::Operator> nodeInfo,
std::vector<size_t> input, std::vector<size_t> output,
std::unordered_map<size_t, Tensor> &edgeToTensor,
std::vector<refactor::computation::Edge> edges) {
std::string name(nodeInfo->opType.name());
auto attr = nodeInfo->attributes;
#define ELSE_IF(op) \
else if (name == "onnx::op") { \
g.addOpWithOutputs<op##Obj>(edgeToTensor[input[0]], \
edgeToTensor[output[0]]); \
}
if (name == "onnx::Conv") {
// auto p = attr["pads"].ints();
// auto s = attr["strides"].ints();
// auto d = attr["dilations"].ints();
// g.addOpWithOutputs<ConvObj>(edgeToTensor[input[0]],
// edgeToTensor[input[1]], edgeToTensor[output[0]], p[0], p[1], s[0],
// s[1], d[0], d[1]);
} else if (name == "onnx::Add") {
g.addOpWithOutputs<AddObj>(edgeToTensor[input[0]],
edgeToTensor[input[1]],
edgeToTensor[output[0]]);
} else if (name == "onnx::AveragePool") {
// auto p = attr["pads"].ints();
// auto s = attr["strides"].ints();
// auto d = attr["dilations"].ints();
// int h = edgeToTensor[input[0]]->getDims()[2];
// int w = edgeToTensor[input[0]]->getDims()[3];
// g.addOpWithOutputs<AvgPoolObj>(edgeToTensor[input[0]],
// edgeToTensor[output[0]], h, w,
// d[0], d[1], p[0], p[1], s[0],
// s[1]);
} else if (name == "onnx::Reshape") {
IT_ASSERT(input.size() == 2);
auto shapeValue =
reinterpret_cast<int64_t *>(edges[input[1]].tensor->data->ptr);
auto rank = edgeToTensor[input[1]]->getDims()[0];
Shape shape(rank);
for (size_t i = 0; i < (size_t)rank; ++i) {
shape[i] = static_cast<int>(*(shapeValue + i));
}
g.addOpWithOutputs<ReshapeObj>(edgeToTensor[input[0]],
edgeToTensor[output[0]], shape);
} else if (name == "onnx::Gemm") {
auto alpha =
attr.find("alpha") != attr.end() ? attr["alpha"].float_() : 1.0;
auto beta =
attr.find("beta") != attr.end() ? attr["beta"].float_() : 1.0;
auto transA =
attr.find("transA") != attr.end() ? attr["transA"].int_() : 0;
auto transB =
attr.find("transB") != attr.end() ? attr["transB"].int_() : 0;
IT_ASSERT(alpha == 1.0);
IT_ASSERT(beta == 1.0);
g.addOpWithOutputs<MatmulObj>(
edgeToTensor[input[0]], edgeToTensor[input[1]],
edgeToTensor[output[0]], transA, transB,
input.size() > 2 ? edgeToTensor[input[2]] : nullptr, ActType::None);
} else if (name == "onnx::Pow") {
g.addOpWithOutputs<PowerObj>(edgeToTensor[input[0]],
edgeToTensor[input[1]],
edgeToTensor[output[0]]);
} else if (name == "onnx::Gather") {
auto axis = attr.find("axis") != attr.end() ? attr["axis"].int_() : 0;
g.addOpWithOutputs<GatherObj>(edgeToTensor[input[0]],
edgeToTensor[input[1]],
edgeToTensor[output[0]], axis);
} else if (name == "onnx::Max") {
g.addOpWithOutputs<MaximumObj>(edgeToTensor[input[0]],
edgeToTensor[input[1]],
edgeToTensor[output[0]]);
} else if (name == "onnx::Div") {
g.addOpWithOutputs<DivObj>(edgeToTensor[input[0]],
edgeToTensor[input[1]],
edgeToTensor[output[0]]);
} else if (name == "onnx::Mul") {
g.addOpWithOutputs<MulObj>(edgeToTensor[input[0]],
edgeToTensor[input[1]],
edgeToTensor[output[0]]);
} else if (name == "onnx::Sub") {
g.addOpWithOutputs<SubObj>(edgeToTensor[input[0]],
edgeToTensor[input[1]],
edgeToTensor[output[0]]);
} else if (name == "onnx::Slice") {
auto startValue =
reinterpret_cast<int64_t *>(edges[input[1]].tensor->data->ptr);
auto startRank = edgeToTensor[input[1]]->getRank();
auto endValue =
reinterpret_cast<int64_t *>(edges[input[2]].tensor->data->ptr);
auto endRank = edgeToTensor[input[2]]->getRank();
std::vector<int> start, end, axesVal, stepsVal;
std::optional<std::vector<int>> axes, steps;
if (input.size() > 3) {
auto axesValue =
reinterpret_cast<int64_t *>(edges[input[3]].tensor->data->ptr);
auto axesRank = edgeToTensor[input[3]]->getRank();
for (size_t i = 0; i < axesRank; ++i) {
axesVal.emplace_back(static_cast<int>(*(axesValue + i)));
}
axes = axesVal;
}
if (input.size() > 4) {
auto stepsValue =
reinterpret_cast<int64_t *>(edges[input[4]].tensor->data->ptr);
auto stepsRank = edgeToTensor[input[4]]->getRank();
for (size_t i = 0; i < stepsRank; ++i) {
stepsVal.emplace_back(static_cast<int>(*(stepsValue + i)));
}
steps = stepsVal;
}
for (size_t i = 0; i < startRank; ++i) {
int64_t startVal = *(startValue + i);
if (axes.has_value()) {
startVal = std::min(
startVal,
static_cast<int64_t>(
edgeToTensor[input[0]]->getDims()[axes.value()[i]]));
} else {
startVal = std::min(
startVal,
static_cast<int64_t>(edgeToTensor[input[0]]->getDims()[i]));
}
start.emplace_back(static_cast<int>(startVal));
}
for (size_t i = 0; i < endRank; ++i) {
int64_t endVal = *(endValue + i);
if (axes.has_value()) {
endVal = std::min(
endVal,
static_cast<int64_t>(
edgeToTensor[input[0]]->getDims()[axes.value()[i]]));
} else {
endVal = std::min(
endVal,
static_cast<int64_t>(edgeToTensor[input[0]]->getDims()[i]));
}
end.emplace_back(static_cast<int>(endVal));
}
g.addOpWithOutputs<SliceObj>(edgeToTensor[input[0]],
edgeToTensor[output[0]], start, end, axes,
steps);
} else if (name == "onnx::Softmax") {
auto axis = attr.find("axis") != attr.end() ? attr["axis"].int_() : -1;
g.addOpWithOutputs<SoftmaxObj>(edgeToTensor[input[0]],
edgeToTensor[output[0]], axis);
} else if (name == "onnx::ReduceMean") {
auto keepdims =
attr.find("keepdims") != attr.end() ? attr["keepdims"].int_() : 1;
std::vector<int> axesVal;
std::optional<std::vector<int>> axes;
if (input.size() > 1) {
auto axesValue =
reinterpret_cast<int64_t *>(edges[input[1]].tensor->data->ptr);
auto axesRank = edgeToTensor[input[1]]->getRank();
for (size_t i = 0; i < axesRank; ++i) {
axesVal.emplace_back(static_cast<int>(*(axesValue + i)));
}
axes = axesVal;
}
g.addOpWithOutputs<ReduceMeanObj>(
edgeToTensor[input[0]], edgeToTensor[output[0]], axes, keepdims);
} else if (name == "onnx::Concat") {
auto axis = attr["axis"].int_();
std::vector<Tensor> inputs;
for (auto i : input) {
inputs.emplace_back(edgeToTensor[i]);
}
g.addOpWithOutputs<ConcatObj>(inputs, edgeToTensor[output[0]], axis);
} else if (name == "onnx::MatMul") {
g.addOpWithOutputs<MatmulObj>(
edgeToTensor[input[0]], edgeToTensor[input[1]],
edgeToTensor[output[0]], false, false, nullptr, ActType::None);
} else if (name == "onnx::Transpose") {
int rank = edgeToTensor[input[0]]->getRank();
std::vector<int> permDefault;
for (int i = rank - 1; i >= 0; --i) {
permDefault.emplace_back(i);
}
std::vector<int> perm;
if (attr.find("perm") != attr.end()) {
auto permAttr = attr["perm"].ints();
for (auto e : permAttr) {
perm.emplace_back(static_cast<int>(e));
}
} else {
perm = permDefault;
}
g.addOpWithOutputs<TransposeObj>(edgeToTensor[input[0]],
edgeToTensor[output[0]], perm);
}
ELSE_IF(Relu)
ELSE_IF(Sqrt)
ELSE_IF(Identity)
#undef ELSE_IF
}
void addEdgeToTensor(GraphObj &g, size_t index,
std::shared_ptr<refactor::computation::Tensor> tensor,
std::unordered_map<size_t, Tensor> &edgeToTensor,
Runtime runtime) {
std::shared_ptr<refactor::computation::Tensor> tensor,
std::unordered_map<size_t, Tensor> &edgeToTensor,
Runtime runtime) {
auto refShape = tensor->shape;
Shape shape;
for (auto ele : refShape) {
@ -217,7 +266,7 @@ void addEdgeToTensor(GraphObj &g, size_t index,
shape.emplace_back(ele.value());
}
auto dType = tensor->dataType;
Tensor tensorInf = g.addTensor(shape, DataType(static_cast<int>(dType)));
Tensor tensorInf = g.addTensor(shape, DataType(static_cast<int>(dType)));
edgeToTensor.insert(std::make_pair(index, tensorInf));
}
} // namespace infini