From a27391fcdcefcff3b355f3d42faf0ab7dbc40cf5 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Thu, 23 Feb 2023 11:48:28 +0800 Subject: [PATCH 01/29] =?UTF-8?q?fix:=20=E4=BF=AE=E6=AD=A3=20batchNorm=20?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - onnx 和 pytorch 认为 batchNorm 的 4 个参数是 [c] 形状的,cuDNN 可能认为是 [1,c,1,...]。 优化已改为 [c],但 cuDNN 推理没有改; Signed-off-by: YdrMaster --- pyinfinitensor/tests/test_onnx.py | 8 ++++---- src/operators/batch_norm.cc | 9 +++------ test/operators/test_batch_norm.cc | 8 ++++---- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/pyinfinitensor/tests/test_onnx.py b/pyinfinitensor/tests/test_onnx.py index 1f839256..0db20fc9 100644 --- a/pyinfinitensor/tests/test_onnx.py +++ b/pyinfinitensor/tests/test_onnx.py @@ -66,10 +66,10 @@ class TestStringMethods(unittest.TestCase): def test_batch_norm(self): x = make_tensor_value_info("x", TensorProto.UINT32, [1, 3, 2, 2]) - scale = make_tensor_value_info("scale", TensorProto.FLOAT, [1, 3, 1, 1]) - b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 1, 1]) - mean = make_tensor_value_info("mean", TensorProto.FLOAT, [1, 3, 1, 1]) - var = make_tensor_value_info("var", TensorProto.FLOAT, [1, 3, 1, 1]) + scale = make_tensor_value_info("scale", TensorProto.FLOAT, [3]) + b = make_tensor_value_info("b", TensorProto.FLOAT, [3]) + mean = make_tensor_value_info("mean", TensorProto.FLOAT, [3]) + var = make_tensor_value_info("var", TensorProto.FLOAT, [3]) y = make_tensor_value_info("y", TensorProto.UINT32, [1, 3, 2, 2]) batch_norm = make_node( "BatchNormalization", diff --git a/src/operators/batch_norm.cc b/src/operators/batch_norm.cc index b9be9425..b744df5a 100644 --- a/src/operators/batch_norm.cc +++ b/src/operators/batch_norm.cc @@ -19,12 +19,9 @@ BatchNormObj::inferShape(const TensorVec &inputs) const { auto var = inputs[2]; auto scale = inputs[3]; auto bias = inputs[4]; - if (input->getDims().size() < 2) - return {}; - Shape dims(input->getDims().size(), 1); - dims[1] = input->getDims()[1]; // - if (mean->getDims() != dims || var->getDims() != dims || - scale->getDims() != dims || bias->getDims() != dims) + auto c = std::vector{input->getDims()[1]}; + if (mean->getDims() != c || var->getDims() != c || scale->getDims() != c || + bias->getDims() != c) return {}; return {{input->getDims()}}; } diff --git a/test/operators/test_batch_norm.cc b/test/operators/test_batch_norm.cc index d904edd1..438db6e6 100644 --- a/test/operators/test_batch_norm.cc +++ b/test/operators/test_batch_norm.cc @@ -9,10 +9,10 @@ TEST(BatchNorm, ShapeInference) { { Graph g = make_ref(cpuRuntime); Tensor i = g->addTensor({1, 3, 2, 2}, DataType::UInt32); - Tensor mean = g->addTensor({1, 3, 1, 1}, DataType::Float32); - Tensor var = g->addTensor({1, 3, 1, 1}, DataType::Float32); - Tensor scaler = g->addTensor({1, 3, 1, 1}, DataType::Float32); - Tensor bias = g->addTensor({1, 3, 1, 1}, DataType::Float32); + Tensor mean = g->addTensor({3}, DataType::Float32); + Tensor var = g->addTensor({3}, DataType::Float32); + Tensor scaler = g->addTensor({3}, DataType::Float32); + Tensor bias = g->addTensor({3}, DataType::Float32); auto op = g->addOp(i, nullptr, mean, var, scaler, bias, 0.9, 1e-5); EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 3, 2, 2})); From 4ffaa44c1eebfe394e792c3e05238942d29ae0d3 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Thu, 23 Feb 2023 11:51:46 +0800 Subject: [PATCH 02/29] =?UTF-8?q?fix:=20Matmul=20=E6=94=AF=E6=8C=81=202=20?= =?UTF-8?q?=E7=BB=B4=E6=88=96=E4=BB=A5=E4=B8=8A=E7=9A=84=E8=BE=93=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit > 现在能导入 resnet18 Signed-off-by: YdrMaster --- pyinfinitensor/tests/test_onnx.py | 5 ++-- src/operators/matmul.cc | 42 ++++++++++++++++++------------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/pyinfinitensor/tests/test_onnx.py b/pyinfinitensor/tests/test_onnx.py index 0db20fc9..8be02f11 100644 --- a/pyinfinitensor/tests/test_onnx.py +++ b/pyinfinitensor/tests/test_onnx.py @@ -8,7 +8,7 @@ from onnx.helper import ( make_tensor_value_info, ) from onnx.checker import check_model -from pyinfinitensor.onnx import from_onnx, parse_onnx, backend, runtime, to_onnx +from pyinfinitensor.onnx import from_onnx, backend, runtime, to_onnx def make_and_import_model(graph: onnx.GraphProto): @@ -28,7 +28,7 @@ class TestStringMethods(unittest.TestCase): file=model_file, size=os.path.getsize(model_file) / 1024 / 1024 ) ) - parse_onnx(onnx.load(model_file)) + from_onnx(onnx.load(model_file)) def test_tensor(self): x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 2, 3]) @@ -290,7 +290,6 @@ class TestStringMethods(unittest.TestCase): model = make_model(graph) check_model(model) from_onnx(model) - parse_onnx(model) def test_frontend(self): handler = backend.GraphHandler(runtime) diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc index 8e82ccc7..07708d07 100644 --- a/src/operators/matmul.cc +++ b/src/operators/matmul.cc @@ -5,10 +5,26 @@ namespace infini { MatmulObj::MatmulObj(GraphObj *graph, Tensor A, Tensor B, Tensor C, bool transA, bool transB, [[maybe_unused]] Tensor bias, ActType act) : OperatorObj(OpType::Matmul, {A, B}, {C}), transA(transA), transB(transB), - act(act), b(A->getDims()[0]), - m(transA ? A->getDims()[2] : A->getDims()[1]), - n(transB ? B->getDims()[1] : B->getDims()[2]), - k(transA ? A->getDims()[1] : A->getDims()[2]) { + act(act), b(1) { + auto shape_a = A->getDims(); + auto shape_b = B->getDims(); + IT_ASSERT(shape_a.size() == shape_b.size()); + switch (shape_a.size()) { + case 0: + case 1: + IT_ASSERT(false); + case 2: + break; + default: + for (size_t i = 0; i < shape_a.size() - 2; ++i) { + IT_ASSERT(shape_a[i] == shape_b[i]); + b *= shape_a[i]; + } + break; + } + m = *(transA ? shape_a.rbegin() : shape_a.rbegin() + 1); + n = *(transB ? shape_b.rbegin() + 1 : shape_b.rbegin()); + k = *(transA ? shape_a.rbegin() + 1 : shape_a.rbegin()); IT_ASSERT(checkValid(graph)); } @@ -22,19 +38,11 @@ string MatmulObj::toString() const { } optional> MatmulObj::inferShape(const TensorVec &inputs) const { - auto A = inputs[0], B = inputs[1]; - // if (A->getType() == Tensor::Weight && B->getType() == Tensor::Weight) - // return false; - if (!(A->getDims().size() == 3 && B->getDims().size() == 3)) - return {}; - if (!(A->getDims()[0] == B->getDims()[0])) - return {}; - if (!((transA ? A->getDims()[1] : A->getDims()[2]) == - (transB ? B->getDims()[2] : B->getDims()[1]))) - return {}; - int b(A->getDims()[0]), m(transA ? A->getDims()[2] : A->getDims()[1]), - n(transB ? B->getDims()[1] : B->getDims()[2]); - return {{{b, m, n}}}; + auto shape_a = inputs[0]->getDims(); + auto it = shape_a.rbegin(); + *it++ = n; + *it++ = m; + return {{std::move(shape_a)}}; } vector MatmulObj::getWorkloadVector() const { From ed81861375bb7eeef9f73496c6a7c6220ea8dee1 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Thu, 23 Feb 2023 15:29:16 +0800 Subject: [PATCH 03/29] =?UTF-8?q?temp:=20=E5=AE=9E=E7=8E=B0=E5=88=9D?= =?UTF-8?q?=E5=A7=8B=E5=80=BC=E5=AF=BC=E5=85=A5=EF=BC=8C=E4=BD=86=20resnet?= =?UTF-8?q?=20=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- include/core/graph_handler.h | 20 ++++++++ pyinfinitensor/src/pyinfinitensor/onnx.py | 60 +++++++++-------------- src/ffi/ffi_infinitensor.cc | 3 ++ 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/include/core/graph_handler.h b/include/core/graph_handler.h index dc221042..82695faa 100644 --- a/include/core/graph_handler.h +++ b/include/core/graph_handler.h @@ -2,6 +2,8 @@ #include "core/graph.h" #include "core/runtime.h" +#include +#include namespace infini { @@ -87,6 +89,24 @@ class GraphHandlerObj { inline void data_malloc() { g->dataMalloc(); } + inline void copy_int32(Tensor tensor, std::vector list) { + std::cout << "copy " << list.size() << " ints to (" << tensor->size() + << ")" << std::endl; + tensor->copyData(list); + } + + inline void copy_int64(Tensor tensor, std::vector list) { + std::cout << "copy " << list.size() << " ints to (" << tensor->size() + << ")" << std::endl; + tensor->copyData(list); + } + + inline void copy_float(Tensor tensor, std::vector list) { + std::cout << "copy " << list.size() << " floats to (" << tensor->size() + << ")" << std::endl; + tensor->copyData(list); + } + inline void run() { g->getRuntime()->run(g); } }; diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index ce315685..207280ef 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -22,7 +22,7 @@ from onnx.checker import ( check_tensor, ) from onnx.shape_inference import infer_shapes -from typing import Dict, List, Any, Tuple, Sequence +from typing import Dict, List, Any, Tuple, Sequence, Union from functools import reduce runtime = backend.cpu_runtime() @@ -324,6 +324,24 @@ def from_onnx(model: ModelProto) -> backend.GraphHandler: else: raise Exception('Unsupported operator "{}"'.format(node.op_type)) + handler.data_malloc() + + inputs = [] + for name, obj in tensors.items(): + tensor = data.get(name) + if tensor == None: + if any(input.name == name for input in model.graph.input): + inputs.append((name, tensor)) + else: + if tensor.data_type == TensorProto.INT32: + handler.copy_int32(obj, [int(i) for i in tensor.int32_data]) + elif tensor.data_type == TensorProto.INT64: + handler.copy_int64(obj, [int(i) for i in tensor.int64_data]) + elif tensor.data_type == TensorProto.FLOAT: + handler.copy_float(obj, [float(i) for i in tensor.float_data]) + else: + assert False, "Unsupported Tensor Type: {}".format(tensor.data_type) + def to_onnx(graph: backend.GraphHandler, name: str) -> ModelProto: class Context: @@ -482,42 +500,6 @@ def to_onnx(graph: backend.GraphHandler, name: str) -> ModelProto: return ctx.build(name) -def parse_onnx(model: ModelProto): - print() - - for field in [ - "doc_string", - "domain", - "functions", - "metadata_props", - "model_version", - "producer_name", - "producer_version", - "training_info", - ]: - print("{}: {}".format(field, getattr(model, field))) - - print("ir_version:", model.ir_version) - for opset in model.opset_import: - print("opset domain={} version={}".format(opset.domain, opset.version)) - - print("layout:") - for node in model.graph.node: - print( - ' {o} <- {op}"{name}"{a} <- {i}'.format( - name=node.name, - op=node.op_type, - i=node.input, - o=node.output, - a=[a.name for a in node.attribute], - ) - ) - - print("weight:") - for node in model.graph.initializer: - print(" {}".format(node.name)) - - def _parse_attribute(node: NodeProto, attrs: Dict[str, Any] = dict()) -> Dict[str, Any]: for attr in node.attribute: if attr.name in attrs: @@ -536,11 +518,13 @@ def _parse_attribute(node: NodeProto, attrs: Dict[str, Any] = dict()) -> Dict[st return attrs -def _parse_data(tensor: TensorProto) -> List[int]: +def _parse_data(tensor: TensorProto) -> List[Union[int, float]]: if tensor.data_type == TensorProto.INT32: return [int(i) for i in tensor.int32_data] elif tensor.data_type == TensorProto.INT64: return [int(i) for i in tensor.int64_data] + elif tensor.data_type == TensorProto.FLOAT: + return [float(i) for i in tensor.float_data] else: assert False, "Unsupported Tensor Type: {}".format(tensor.data_type) diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index e545f43c..b791e66e 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -171,6 +171,9 @@ void init_graph_builder(py::module &m) { .def("topo_sort", &Handler::topo_sort, policy::automatic) .def("operators", &Handler::operators, policy::move) .def("data_malloc", &Handler::data_malloc, policy::automatic) + .def("copy_int32", &Handler::copy_int32, policy::automatic) + .def("copy_int64", &Handler::copy_int64, policy::automatic) + .def("copy_float", &Handler::copy_float, policy::automatic) .def("run", &Handler::run, policy::automatic); } From 60c5d6b5b8561093f83d29565358b64a623254c6 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Thu, 23 Feb 2023 16:22:03 +0800 Subject: [PATCH 04/29] =?UTF-8?q?fix:=20=E5=85=88=E4=B8=8D=E5=9C=A8=20cpu?= =?UTF-8?q?=20=E4=B8=8A=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7dae8509..879ae847 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -66,5 +66,5 @@ jobs: - name: Download test model run: wget ${{ env.resnet-download }} - - name: Test onnx frontend - run: make test-onnx + # - name: Test onnx frontend + # run: make test-onnx From 9ab78f13f73108239956db0e7f4c102fd1ae40a0 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Thu, 23 Feb 2023 16:29:20 +0800 Subject: [PATCH 05/29] =?UTF-8?q?feat:=20=E5=AF=BC=E5=87=BA=20cuda=5Frunti?= =?UTF-8?q?me?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- pyinfinitensor/src/pyinfinitensor/onnx.py | 4 ++-- src/ffi/ffi_infinitensor.cc | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 207280ef..ef7d4de2 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -25,10 +25,10 @@ from onnx.shape_inference import infer_shapes from typing import Dict, List, Any, Tuple, Sequence, Union from functools import reduce -runtime = backend.cpu_runtime() +cpu_runtime = backend.cpu_runtime() -def from_onnx(model: ModelProto) -> backend.GraphHandler: +def from_onnx(model: ModelProto, runtime) -> backend.GraphHandler: model = infer_shapes(model) handler = backend.GraphHandler(runtime) diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index b791e66e..5d6085ea 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -6,6 +6,7 @@ #include #ifdef USE_CUDA +#include "cuda/cuda_runtime.h" #include "cuda/operator_timer.h" #endif @@ -94,6 +95,10 @@ static int tensor_dtype(Tensor t) { IT_ASSERT(false, "Unsupported data type"); } +#ifdef USE_CUDA +static Ref cuda_runtime() { return make_ref(); } +#endif + static int concat_axis_of(Operator op) { IT_ASSERT(op->getOpType() == OpType::Concat); return dynamic_cast(op.get())->getDim(); @@ -118,6 +123,9 @@ static Shape reshape_shape_of(Operator op) { void export_functions(py::module &m) { #define FUNCTION(NAME) def(#NAME, &NAME) m.def("cpu_runtime", &CpuRuntimeObj::getInstance) +#ifdef USE_CUDA + .FUNCTION(cuda_runtime) +#endif .FUNCTION(tensor_dtype) .FUNCTION(reshape_shape_of) .FUNCTION(concat_axis_of) @@ -132,6 +140,10 @@ void init_graph_builder(py::module &m) { py::class_>(m, "Runtime"); py::class_, RuntimeObj>( m, "CpuRuntime"); +#ifdef USE_CUDA + py::class_, RuntimeObj>( + m, "CudaRuntime"); +#endif py::class_>(m, "Tensor") .def("shape", &TensorObj::getDims, policy::move) .def("src", &TensorObj::getOutputOf, policy::move); From dc79b7265506c0d139aa31289657d7a9ddf40ac6 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Thu, 23 Feb 2023 17:20:05 +0800 Subject: [PATCH 06/29] =?UTF-8?q?fix:=20=E9=87=8D=E6=96=B0=E5=AF=BC?= =?UTF-8?q?=E5=87=BA=20cuda=5Fruntime()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- pyinfinitensor/src/pyinfinitensor/onnx.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index ef7d4de2..f815c037 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -28,6 +28,10 @@ from functools import reduce cpu_runtime = backend.cpu_runtime() +def cuda_runtime(): + return backend.cuda_runtime() + + def from_onnx(model: ModelProto, runtime) -> backend.GraphHandler: model = infer_shapes(model) handler = backend.GraphHandler(runtime) From 6dce129cb3fdc20578506528085fe10e0eee7e6b Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 09:22:03 +0800 Subject: [PATCH 07/29] fix: TensorObj::dataMalloc Signed-off-by: YdrMaster --- .github/workflows/build.yml | 4 ++-- pyinfinitensor/tests/test_onnx.py | 10 +++++----- src/core/tensor.cc | 11 ++--------- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 879ae847..7dae8509 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -66,5 +66,5 @@ jobs: - name: Download test model run: wget ${{ env.resnet-download }} - # - name: Test onnx frontend - # run: make test-onnx + - name: Test onnx frontend + run: make test-onnx diff --git a/pyinfinitensor/tests/test_onnx.py b/pyinfinitensor/tests/test_onnx.py index 8be02f11..a529fb67 100644 --- a/pyinfinitensor/tests/test_onnx.py +++ b/pyinfinitensor/tests/test_onnx.py @@ -8,13 +8,13 @@ from onnx.helper import ( make_tensor_value_info, ) from onnx.checker import check_model -from pyinfinitensor.onnx import from_onnx, backend, runtime, to_onnx +from pyinfinitensor.onnx import from_onnx, backend, to_onnx, cpu_runtime def make_and_import_model(graph: onnx.GraphProto): model = make_model(graph) check_model(model) - from_onnx(model) + from_onnx(model, cpu_runtime) class TestStringMethods(unittest.TestCase): @@ -28,7 +28,7 @@ class TestStringMethods(unittest.TestCase): file=model_file, size=os.path.getsize(model_file) / 1024 / 1024 ) ) - from_onnx(onnx.load(model_file)) + from_onnx(onnx.load(model_file), cpu_runtime) def test_tensor(self): x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 2, 3]) @@ -289,10 +289,10 @@ class TestStringMethods(unittest.TestCase): graph = make_graph([matmul, add], "lr", [x, a, b], [y]) model = make_model(graph) check_model(model) - from_onnx(model) + from_onnx(model, cpu_runtime) def test_frontend(self): - handler = backend.GraphHandler(runtime) + handler = backend.GraphHandler(cpu_runtime) a = handler.tensor([1, 2, 3], 12) b = handler.tensor([1, 2, 3], 12) c = handler.tensor([1, 2, 3], 12) diff --git a/src/core/tensor.cc b/src/core/tensor.cc index 5050e53e..89294a8e 100644 --- a/src/core/tensor.cc +++ b/src/core/tensor.cc @@ -148,15 +148,8 @@ bool TensorObj::equalData(const Tensor &rhs) const { } void TensorObj::dataMalloc() { - if (data != nullptr) - return; - // IT_ASSERT(data == nullptr); - size_t bytesPerElement; - if (getDType() == DataType::Float32) - bytesPerElement = sizeof(float); - else if (getDType() == DataType::UInt32) - bytesPerElement = sizeof(uint32_t); - data = runtime->allocBlob(size() * bytesPerElement); + if (data == nullptr) + data = runtime->allocBlob(getBytes()); } void TensorObj::copyData(const TensorObj *src) { From f44a4daf70d734e61dcbfa47d579743620eeb35e Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 09:39:30 +0800 Subject: [PATCH 08/29] =?UTF-8?q?feat:=20=E5=AF=BC=E5=87=BA=E6=9C=AA?= =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E7=9A=84=E5=BC=A0=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- include/core/graph_handler.h | 6 ------ pyinfinitensor/src/pyinfinitensor/onnx.py | 11 ++++++++--- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/include/core/graph_handler.h b/include/core/graph_handler.h index 82695faa..ff702cf8 100644 --- a/include/core/graph_handler.h +++ b/include/core/graph_handler.h @@ -90,20 +90,14 @@ class GraphHandlerObj { inline void data_malloc() { g->dataMalloc(); } inline void copy_int32(Tensor tensor, std::vector list) { - std::cout << "copy " << list.size() << " ints to (" << tensor->size() - << ")" << std::endl; tensor->copyData(list); } inline void copy_int64(Tensor tensor, std::vector list) { - std::cout << "copy " << list.size() << " ints to (" << tensor->size() - << ")" << std::endl; tensor->copyData(list); } inline void copy_float(Tensor tensor, std::vector list) { - std::cout << "copy " << list.size() << " floats to (" << tensor->size() - << ")" << std::endl; tensor->copyData(list); } diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index f815c037..217d230d 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -32,7 +32,9 @@ def cuda_runtime(): return backend.cuda_runtime() -def from_onnx(model: ModelProto, runtime) -> backend.GraphHandler: +def from_onnx( + model: ModelProto, runtime +) -> Tuple[Dict[str, backend.Tensor], backend.GraphHandler]: model = infer_shapes(model) handler = backend.GraphHandler(runtime) @@ -330,12 +332,13 @@ def from_onnx(model: ModelProto, runtime) -> backend.GraphHandler: handler.data_malloc() - inputs = [] + inputs: Dict[str, backend.Tensor] = {} for name, obj in tensors.items(): + print("{}: {}".format(name, obj)) tensor = data.get(name) if tensor == None: if any(input.name == name for input in model.graph.input): - inputs.append((name, tensor)) + inputs[name] = obj else: if tensor.data_type == TensorProto.INT32: handler.copy_int32(obj, [int(i) for i in tensor.int32_data]) @@ -346,6 +349,8 @@ def from_onnx(model: ModelProto, runtime) -> backend.GraphHandler: else: assert False, "Unsupported Tensor Type: {}".format(tensor.data_type) + return inputs, handler + def to_onnx(graph: backend.GraphHandler, name: str) -> ModelProto: class Context: From cf9bdb05629f15a6bfcd9514bbd8a1936728dc11 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 10:08:04 +0800 Subject: [PATCH 09/29] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E6=89=93?= =?UTF-8?q?=E5=8D=B0=E7=BB=93=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- pyinfinitensor/src/pyinfinitensor/onnx.py | 8 ++++++-- src/ffi/ffi_infinitensor.cc | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 217d230d..6b3e394c 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -34,7 +34,7 @@ def cuda_runtime(): def from_onnx( model: ModelProto, runtime -) -> Tuple[Dict[str, backend.Tensor], backend.GraphHandler]: +) -> Tuple[Dict[str, backend.Tensor], Dict[str, backend.Tensor], backend.GraphHandler]: model = infer_shapes(model) handler = backend.GraphHandler(runtime) @@ -349,7 +349,11 @@ def from_onnx( else: assert False, "Unsupported Tensor Type: {}".format(tensor.data_type) - return inputs, handler + outputs: Dict[str, backend.Tensor] = {} + for output in model.graph.output: + outputs[output.name] = tensors[output.name] + + return inputs, outputs, handler def to_onnx(graph: backend.GraphHandler, name: str) -> ModelProto: diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 5d6085ea..402d5306 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -146,6 +146,7 @@ void init_graph_builder(py::module &m) { #endif py::class_>(m, "Tensor") .def("shape", &TensorObj::getDims, policy::move) + .def("printData", &TensorObj::printData, policy::automatic) .def("src", &TensorObj::getOutputOf, policy::move); py::class_>(m, "Operator") .def("op_type", &OperatorObj::getOpType, policy::automatic) From 3d122aebfe6d091d3452bb763aaf522853ab7789 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 11:19:37 +0800 Subject: [PATCH 10/29] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E5=AF=BC?= =?UTF-8?q?=E5=87=BA=E6=B5=AE=E7=82=B9=E5=90=91=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- include/core/tensor.h | 24 +++++++++++++++++++++++- src/ffi/ffi_infinitensor.cc | 2 +- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/include/core/tensor.h b/include/core/tensor.h index 9e835a6b..422355e9 100644 --- a/include/core/tensor.h +++ b/include/core/tensor.h @@ -1,6 +1,11 @@ #pragma once #include "core/tensor_base.h" #include +#include + +#if USE_CUDA +#include "cuda/cuda_runtime.h" +#endif namespace infini { @@ -55,7 +60,6 @@ class TensorObj : public TensorBaseObj { obj->outputOf.reset(); return obj; } - // TODO: clarify whether clone copies data Tensor clone(Runtime runtime) const { auto obj = make_ref(*this); obj->runtime = runtime; @@ -68,6 +72,24 @@ class TensorObj : public TensorBaseObj { } return obj; } + inline std::vector cloneFloats() const { + IT_ASSERT(data != nullptr); + IT_ASSERT(getDType() == DataType::Float32); + std::vector ans(size()); + auto src = getRawDataPtr(); + auto dst = ans.data(); + auto bytes = getBytes(); + if (runtime->isCpu()) { + memcpy(dst, src, bytes); + } else { +#if USE_CUDA + cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); +#else + IT_TODO_HALT(); +#endif + } + return ans; + } void printData() const; bool equalData(const Tensor &rhs) const; diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 402d5306..2be4971b 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -146,7 +146,7 @@ void init_graph_builder(py::module &m) { #endif py::class_>(m, "Tensor") .def("shape", &TensorObj::getDims, policy::move) - .def("printData", &TensorObj::printData, policy::automatic) + .def("cloneFloats", &TensorObj::cloneFloats, policy::move) .def("src", &TensorObj::getOutputOf, policy::move); py::class_>(m, "Operator") .def("op_type", &OperatorObj::getOpType, policy::automatic) From 514666591e145586b8b30b7829cf69e75f217ba4 Mon Sep 17 00:00:00 2001 From: wanghailu Date: Fri, 24 Feb 2023 13:55:53 +0800 Subject: [PATCH 11/29] add batch_norm --- src/kernels/cuda/batch_norm.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/kernels/cuda/batch_norm.cc b/src/kernels/cuda/batch_norm.cc index 7ca75ba6..b150aaa5 100644 --- a/src/kernels/cuda/batch_norm.cc +++ b/src/kernels/cuda/batch_norm.cc @@ -28,9 +28,11 @@ class BatchNormCudnn : public CudaKernelWithoutConfig { for (size_t i = 0; i < dims.size(); ++i) { dimArray[i] = dims[i]; strideArray[i] = op->getInputs(0)->getStride()[i]; - dimPArray[i] = op->getInputs(1)->getDims()[i]; - stridePArray[i] = op->getInputs(1)->getStride()[i]; + dimPArray[i] = 1; + stridePArray[i] = 1; } + dimPArray[1] = op->getInputs(0)->getDims()[1]; + stridePArray[1] = op->getInputs(0)->getStride()[1]; // get inputs cudnnTensorDescriptor_t inDesc; checkCudnnError(cudnnCreateTensorDescriptor(&inDesc)); From fb3478bf3e395cd5b7d54d81937e3d95b1344250 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 14:14:52 +0800 Subject: [PATCH 12/29] build: update Makefile Signed-off-by: YdrMaster --- Makefile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 71033bbb..3661da78 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,17 @@ .PHONY : build clean install-python test-cpp test-onnx TYPE ?= release +CUDA ?= off + +CMAKE_OPT = -DCMAKE_BUILD_TYPE=$(TYPE) + +ifeq ($(CUDA), on) + CMAKE_OPT += -DUSE_CUDA=ON +endif build: mkdir -p build/$(TYPE) - cd build/$(TYPE) && cmake -DCMAKE_BUILD_TYPE=$(TYPE) ../.. && make -j8 + cd build/$(TYPE) && cmake $(CMAKE_OPT) ../.. && make -j8 clean: rm -rf build From 59bf59c10b966457cb2f0225cc3f43683434b1e0 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 14:28:34 +0800 Subject: [PATCH 13/29] docs: update README.md Signed-off-by: YdrMaster --- Makefile | 2 +- README.md | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 3661da78..4de85392 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ CUDA ?= off CMAKE_OPT = -DCMAKE_BUILD_TYPE=$(TYPE) -ifeq ($(CUDA), on) +ifeq ($(CUDA), ON) CMAKE_OPT += -DUSE_CUDA=ON endif diff --git a/README.md b/README.md index 8e109331..1f6c07fa 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,27 @@ ``` bash # Enter the root of InfiniTensor source test/script/env_lotus.sh -mkdir build && cd build -cmake -DUSE_CUDA=ON .. && make -j 12 +make CUDA=ON ``` +### Make Commands + +- `make`/`make build`: Builds the project; +- `make install-python`: Builds the project then install the python frontend; +- `make test-cpp`: Builds the project then run cpp unit tests; +- `make test-onnx`: Run python unit tests; + +--- + +> Sets env: `CUDA=ON` to enable cuda. + ### CMake Options There are several configurable CMake options, see the [CMakeLists.txt file](/CMakeLists.txt#L5). - If `USE_BACKTRACE` is `ON`, `libdw-dev` have to be installed. See the README of [backward-cpp](https://github.com/bombela/backward-cpp) for details. - If `USE_PROTOBUF` is `ON`, `protobuf` have to be installed. See the README of [protobuf](https://github.com/protocolbuffers/protobuf) for details. +- If `USE_CUDA` is `ON`, `cuda` have to be installed. ## Contributor Guide From 5b6698bac73adc641627da8122e671d07d3734f5 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 15:02:52 +0800 Subject: [PATCH 14/29] =?UTF-8?q?feat:=20=E5=AF=BC=E5=87=BA=E5=85=A8?= =?UTF-8?q?=E5=9B=BE=E7=9A=84=E8=BE=93=E5=87=BA=E5=BC=A0=E9=87=8F=E5=88=B0?= =?UTF-8?q?=20onnx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- include/core/tensor.h | 8 ++++---- include/core/tensor_base.h | 15 +++++++++------ pyinfinitensor/src/pyinfinitensor/onnx.py | 7 ++++++- src/core/tensor.cc | 10 +++++----- src/ffi/ffi_infinitensor.cc | 1 + 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/include/core/tensor.h b/include/core/tensor.h index 422355e9..1edc950a 100644 --- a/include/core/tensor.h +++ b/include/core/tensor.h @@ -56,16 +56,16 @@ class TensorObj : public TensorBaseObj { Tensor clone() const { auto obj = make_ref(*this); obj->freeData(); - obj->inputOf.clear(); - obj->outputOf.reset(); + obj->targets.clear(); + obj->source.reset(); return obj; } Tensor clone(Runtime runtime) const { auto obj = make_ref(*this); obj->runtime = runtime; obj->freeData(); - obj->inputOf.clear(); - obj->outputOf.reset(); + obj->targets.clear(); + obj->source.reset(); if (hasData()) { obj->dataMalloc(); obj->copyData(this); diff --git a/include/core/tensor_base.h b/include/core/tensor_base.h index af2b97c3..09286a63 100644 --- a/include/core/tensor_base.h +++ b/include/core/tensor_base.h @@ -19,8 +19,8 @@ class TensorBaseObj : public Object { int dim; DataType dtype; - vector> inputOf; - WRef outputOf; + vector> targets; + WRef source; Blob data; Runtime runtime; @@ -46,10 +46,13 @@ class TensorBaseObj : public Object { DataType getDType() const { return dtype; } Runtime getRuntime() const { return runtime; } - void addInputOf(const Operator &op) { inputOf.emplace_back(op); } - void setOutputOf(const Operator &op) { outputOf = op; } - OpVec getInputOf() { return wrefs_to_refs(inputOf); } - Operator getOutputOf() { return outputOf.lock(); } + void addInputOf(const Operator &op) { targets.emplace_back(op); } + void setOutputOf(const Operator &op) { source = op; } + + bool hasTarget() const { return !targets.empty(); } + + OpVec getInputOf() const { return wrefs_to_refs(targets); } + Operator getOutputOf() const { return source.lock(); } // std::pair getOutputOfWithIndex(); // bool setScalar(VType val) { diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 6b3e394c..f8fd662c 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -334,7 +334,6 @@ def from_onnx( inputs: Dict[str, backend.Tensor] = {} for name, obj in tensors.items(): - print("{}: {}".format(name, obj)) tensor = data.get(name) if tensor == None: if any(input.name == name for input in model.graph.input): @@ -382,6 +381,12 @@ def to_onnx(graph: backend.GraphHandler, name: str) -> ModelProto: def push_output(self, name: str, tensor: backend.Tensor) -> str: self.names[tensor] = name + if not tensor.has_target(): + shape = tensor.shape() + dtype = backend.tensor_dtype(tensor) + value_info = make_tensor_value_info(name, dtype, shape) + check_value_info(value_info) + self.outputs.append(value_info) return name def push_input(self, tensor: backend.Tensor) -> str: diff --git a/src/core/tensor.cc b/src/core/tensor.cc index 89294a8e..5f04e114 100644 --- a/src/core/tensor.cc +++ b/src/core/tensor.cc @@ -18,13 +18,13 @@ string TensorObj::toString() const { std::to_string(fuid) + ", shape " + vecToString(shape) + ", dtype " + dtype.toString(); vector inputOfGuid; - for (const auto &op : inputOf) + for (const auto &op : targets) inputOfGuid.emplace_back(op.lock()->getGuid()); - if (auto o = outputOf.lock()) - ret += ", outputOf " + std::to_string(o->getGuid()); + if (auto o = source.lock()) + ret += ", source " + std::to_string(o->getGuid()); else - ret += ", outputOf None"; - ret += ", inputOf " + vecToString(inputOfGuid); + ret += ", source None"; + ret += ", targets " + vecToString(inputOfGuid); return ret; } diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 2be4971b..53e1376c 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -147,6 +147,7 @@ void init_graph_builder(py::module &m) { py::class_>(m, "Tensor") .def("shape", &TensorObj::getDims, policy::move) .def("cloneFloats", &TensorObj::cloneFloats, policy::move) + .def("has_target", &TensorObj::hasTarget, policy::automatic) .def("src", &TensorObj::getOutputOf, policy::move); py::class_>(m, "Operator") .def("op_type", &OperatorObj::getOpType, policy::automatic) From 71ca4459d9e79d13c05e6ea0359da6d68d8c50a2 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 15:04:31 +0800 Subject: [PATCH 15/29] fmt Signed-off-by: YdrMaster --- src/kernels/cuda/batch_norm.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/kernels/cuda/batch_norm.cc b/src/kernels/cuda/batch_norm.cc index b150aaa5..35cc78c0 100644 --- a/src/kernels/cuda/batch_norm.cc +++ b/src/kernels/cuda/batch_norm.cc @@ -2,6 +2,7 @@ #include "core/kernel.h" #include "cuda/cuda_kernel_wihtout_config.h" #include "cuda/cuda_runtime.h" + namespace infini { class BatchNormCudnn : public CudaKernelWithoutConfig { void compute(const Operator &_op, @@ -29,7 +30,7 @@ class BatchNormCudnn : public CudaKernelWithoutConfig { dimArray[i] = dims[i]; strideArray[i] = op->getInputs(0)->getStride()[i]; dimPArray[i] = 1; - stridePArray[i] = 1; + stridePArray[i] = 1; } dimPArray[1] = op->getInputs(0)->getDims()[1]; stridePArray[1] = op->getInputs(0)->getStride()[1]; From a5e692baea00a43dd656c82c005a008c3d245200 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 15:42:39 +0800 Subject: [PATCH 16/29] =?UTF-8?q?feat:=20=E5=AF=BC=E5=87=BA=20batchnorm=20?= =?UTF-8?q?=E5=88=B0=20onnx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- include/operators/batch_norm.h | 8 +- pyinfinitensor/src/pyinfinitensor/onnx.py | 972 +++++++++++----------- pyinfinitensor/tests/test_onnx.py | 4 +- src/ffi/ffi_infinitensor.cc | 11 +- 4 files changed, 515 insertions(+), 480 deletions(-) diff --git a/include/operators/batch_norm.h b/include/operators/batch_norm.h index 20842615..8e41a043 100644 --- a/include/operators/batch_norm.h +++ b/include/operators/batch_norm.h @@ -39,9 +39,11 @@ class BatchNormObj : public OperatorObj { std::string toString() const override; // output size will be 3 when training - int numInputs() const override { return 5; } - int numOutputs() const override { return outputs.size(); } - float getEps() const { return eps; } + inline int numInputs() const override { return 5; } + inline int numOutputs() const override { return outputs.size(); } + inline float getMomentum() const { return momentum; } + inline float getEps() const { return eps; } + inline bool getTraining() const { return training; } private: vector getWorkloadVector() const override; diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index f8fd662c..28b8f514 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -32,490 +32,516 @@ def cuda_runtime(): return backend.cuda_runtime() -def from_onnx( - model: ModelProto, runtime -) -> Tuple[Dict[str, backend.Tensor], Dict[str, backend.Tensor], backend.GraphHandler]: - model = infer_shapes(model) - handler = backend.GraphHandler(runtime) - - tensors: Dict[str, backend.Tensor] = dict() - data: Dict[str, TensorProto] = dict() - - for input in model.graph.input: - dims = _take_shape_dim(input.type.tensor_type.shape) - tensors[input.name] = handler.tensor(dims, input.type.tensor_type.elem_type) - - for output in model.graph.output: - dims = _take_shape_dim(output.type.tensor_type.shape) - tensors[output.name] = handler.tensor(dims, output.type.tensor_type.elem_type) - - for initializer in model.graph.initializer: - data[initializer.name] = initializer - - for node in model.graph.node: - if node.op_type == "Conv": - attributes = _parse_attribute( - node, - { - "dilations": [1, 1], - "pads": [0, 0], - "strides": [1, 1], - }, - ) - (d, p, s) = (attributes[name] for name in ["dilations", "pads", "strides"]) - tensors[node.output[0]] = handler.conv( - tensors[node.input[0]], - tensors[node.input[1]], - tensors.get(node.output[0]), - p[0], - p[1], - s[0], - s[1], - d[0], - d[1], - ) - elif node.op_type == "MatMul": - tensors[node.output[0]] = handler.matmul( - tensors[node.input[0]], - tensors[node.input[1]], - tensors.get(node.output[0]), - False, - False, - None, - backend.ActType.Linear, - ) - elif node.op_type == "Gemm": - attributes = _parse_attribute( - node, {"alpha": 1.0, "beta": 1.0, "transA": 0, "transB": 0} - ) - (alpha, beta, transA, transB) = ( - attributes[name] for name in ["alpha", "beta", "transA", "transB"] - ) - # FIXME 不支持 `alpha` `beta` - assert alpha == 1.0 - assert beta == 1.0 - tensors[node.output[0]] = handler.matmul( - tensors[node.input[0]], - tensors[node.input[1]], - tensors.get(node.output[0]), - transA == 1, - transB == 1, - tensors[node.input[2]] if len(node.input) > 2 else None, - backend.ActType.Linear, - ) - elif node.op_type == "BatchNormalization": - (input, mean, var, scale, bias) = ( - tensors[node.input[i]] for i in [0, 3, 4, 1, 2] - ) - output = tensors.get(node.output[0]) - attributes = _parse_attribute( - node, {"momentum": 0.9, "epsilon": 1e-05, "training_mode": 0} - ) - (momentum, eps, training) = ( - attributes[name] for name in ["momentum", "epsilon", "training_mode"] - ) - tensors[node.output[0]] = handler.batchNorm( - input, output, mean, var, scale, bias, momentum, eps, training != 0 - ) - elif node.op_type == "MaxPool": - attributes = _parse_attribute( - node, - { - "kernel_shape": None, - "dilations": [1, 1], - "pads": [0, 0], - "strides": [1, 1], - }, - ) - (k, d, p, s) = ( - attributes[name] - for name in ["kernel_shape", "dilations", "pads", "strides"] - ) - tensors[node.output[0]] = handler.maxPool( - tensors[node.input[0]], - tensors.get(node.output[0]), - k[0], - k[1], - d[0], - d[1], - p[0], - p[1], - s[0], - s[1], - ) - elif node.op_type == "AveragePool": - attributes = _parse_attribute( - node, - { - "kernel_shape": None, - "pads": [0, 0], - "strides": [1, 1], - }, - ) - (k, p, s) = ( - attributes[name] for name in ["kernel_shape", "pads", "strides"] - ) - tensors[node.output[0]] = handler.avgPool( - tensors[node.input[0]], - tensors.get(node.output[0]), - k[0], - k[1], - 1, - 1, - p[0], - p[1], - s[0], - s[1], - ) - elif node.op_type == "GlobalAveragePool": - shape = next( - ( - value.type.tensor_type.shape - for value in model.graph.value_info - if value.name == node.input[0] - ), - None, - ) or next( - input.type.tensor_type.shape - for input in model.graph.input - if input.name == node.input[0] - ) - [_, _, h, w] = _take_shape_dim(shape) - tensors[node.output[0]] = handler.avgPool( - tensors[node.input[0]], - tensors.get(node.output[0]), - h, - w, - 1, - 1, - 0, - 0, - 1, - 1, - ) - elif node.op_type == "Add": - tensors[node.output[0]] = handler.add( - tensors[node.input[0]], - tensors[node.input[1]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Sub": - tensors[node.output[0]] = handler.sub( - tensors[node.input[0]], - tensors[node.input[1]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Mul": - tensors[node.output[0]] = handler.mul( - tensors[node.input[0]], - tensors[node.input[1]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Div": - tensors[node.output[0]] = handler.div( - tensors[node.input[0]], - tensors[node.input[1]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Pow": - tensors[node.output[0]] = handler.pow( - tensors[node.input[0]], - tensors[node.input[1]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Relu": - tensors[node.output[0]] = handler.relu( - tensors[node.input[0]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Sigmoid": - tensors[node.output[0]] = handler.sigmoid( - tensors[node.input[0]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Tanh": - tensors[node.output[0]] = handler.tanh( - tensors[node.input[0]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Softmax": - tensors[node.output[0]] = handler.softmax( - tensors[node.input[0]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Abs": - tensors[node.output[0]] = handler.abs( - tensors[node.input[0]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Identity": - tensors[node.output[0]] = handler.identity( - tensors[node.input[0]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Flatten": - # FIXME 后端算子不支持沿任意轴展开 - axis = next( - (attr.i for attr in node.attribute if attr.name == "axis"), None - ) - assert axis == None or axis == 1 - tensors[node.output[0]] = handler.flatten( - tensors[node.input[0]], - tensors.get(node.output[0]), - ) - elif node.op_type == "Reshape": - input_shape = next( - ( - value.type.tensor_type.shape - for value in model.graph.value_info - if value.name == node.input[0] - ), - None, - ) or next( - input.type.tensor_type.shape - for input in model.graph.input - if input.name == node.input[0] - ) - dims = _take_shape_dim(input_shape) - size = reduce(lambda acc, x: acc * x, dims) - output_shape = [int(i) for i in data[node.input[1]].int64_data] - for i, x in enumerate(output_shape): - if x == 0: - output_shape[i] = dims[i] - temp = reduce(lambda acc, x: acc * x, output_shape) - if temp < 0: - output_shape[output_shape.index(-1)] = size // -temp - tensors[node.output[0]] = handler.reshape( - tensors[node.input[0]], - tensors.get(node.output[0]), - output_shape, - ) - elif node.op_type == "Concat": - tensors[node.output[0]] = handler.concat( - [tensors[name] for name in node.input], - tensors.get(node.output[0]), - next((attr.i for attr in node.attribute if attr.name == "axis")), - ) - elif node.op_type == "Gather": - tensors[node.output[0]] = handler.gather( - tensors[node.input[0]], - tensors[node.input[1]], - tensors.get(node.output[0]), - next((attr.i for attr in node.attribute if attr.name == "axis")), - ) - elif node.op_type == "ReduceMean": - tensors[node.output[0]] = handler.reduceMean( - tensors[node.input[0]], - tensors.get(node.output[0]), - tensors[node.input[1]] if len(node.input) > 1 else None, - next((attr.i for attr in node.attribute if attr.name == "keepdims")) - != 0, - ) - elif node.op_type == "Slice": - tensors[node.output[0]] = handler.slice( - tensors[node.input[0]], - tensors.get(node.output[0]), - _parse_data(data[node.input[1]]), - _parse_data(data[node.input[2]]), - _parse_data(data[node.input[3]]) if len(node.input) > 3 else None, - _parse_data(data[node.input[4]]) if len(node.input) > 4 else None, - ) - elif node.op_type == "Pad": - tensors[node.output[0]] = handler.pad( - tensors[node.input[0]], - tensors.get(node.output[0]), - _parse_data(data[node.input[1]]), - _parse_data(data[node.input[3]]) if len(node.input) > 3 else None, - ) - else: - raise Exception('Unsupported operator "{}"'.format(node.op_type)) - - handler.data_malloc() - +class OnnxStub: inputs: Dict[str, backend.Tensor] = {} - for name, obj in tensors.items(): - tensor = data.get(name) - if tensor == None: - if any(input.name == name for input in model.graph.input): - inputs[name] = obj - else: - if tensor.data_type == TensorProto.INT32: - handler.copy_int32(obj, [int(i) for i in tensor.int32_data]) - elif tensor.data_type == TensorProto.INT64: - handler.copy_int64(obj, [int(i) for i in tensor.int64_data]) - elif tensor.data_type == TensorProto.FLOAT: - handler.copy_float(obj, [float(i) for i in tensor.float_data]) - else: - assert False, "Unsupported Tensor Type: {}".format(tensor.data_type) - outputs: Dict[str, backend.Tensor] = {} - for output in model.graph.output: - outputs[output.name] = tensors[output.name] + handler: backend.GraphHandler - return inputs, outputs, handler + def __init__(self, model: ModelProto, runtime): + model = infer_shapes(model) + self.handler = backend.GraphHandler(runtime) + tensors: Dict[str, backend.Tensor] = dict() + data: Dict[str, TensorProto] = dict() -def to_onnx(graph: backend.GraphHandler, name: str) -> ModelProto: - class Context: - # saves object names, including tensors and operators - names: Dict[Any, str] = dict() - # counts the occurrence times of each operator for naming - count_op: Dict[backend.OpType, int] = dict() - # counts input and output tensors for naming - count_in, count_out = 0, 0 - # saves nodes (operators) - nodes: List[NodeProto] = [] - # saves global input tensors - inputs: List[ValueInfoProto] = [] - # saves global output tensors - outputs: List[ValueInfoProto] = [] - # saves global input tensors - initializers: List[TensorProto] = [] - - def name_op(self, op: backend.Operator) -> Tuple[backend.OpType, str]: - ty = op.op_type() - name = "{}{}".format(ty.name, self.count_op.setdefault(ty, 0) + 1) - self.names[op] = name - self.count_op[ty] += 1 - return ty, name - - def push_output(self, name: str, tensor: backend.Tensor) -> str: - self.names[tensor] = name - if not tensor.has_target(): - shape = tensor.shape() - dtype = backend.tensor_dtype(tensor) - value_info = make_tensor_value_info(name, dtype, shape) - check_value_info(value_info) - self.outputs.append(value_info) - return name - - def push_input(self, tensor: backend.Tensor) -> str: - name = self.names.get(tensor) - # means that this input is a global input - if name is None: - self.count_in += 1 - name = "input{}".format(self.count_in) - self.names[tensor] = name - shape = tensor.shape() - dtype = backend.tensor_dtype(tensor) - value_info = make_tensor_value_info(name, dtype, shape) - check_value_info(value_info) - self.inputs.append(value_info) - - return name - - def push_data_input( - self, - node_name: str, - attr_name: str, - elem_type: int, - shape: Sequence[int], - vals: Any, - ) -> str: - name = "{}_{}".format(node_name, attr_name) - value_info = make_tensor_value_info(name, elem_type, shape) - tensor = make_tensor(name, elem_type, shape, vals) - check_value_info(value_info) - check_tensor(tensor) - self.inputs.append(value_info) - self.initializers.append(tensor) - return name - - def push_node(self, node: NodeProto) -> None: - check_node(node) - self.nodes.append(node) - - def build(self, name: str) -> ModelProto: - print() - print(ctx.names) - print() - print(ctx.inputs) - print() - print(ctx.outputs) - print() - print(ctx.nodes) - - graph = make_graph( - self.nodes, name, self.inputs, self.outputs, self.initializers + for input in model.graph.input: + dims = _take_shape_dim(input.type.tensor_type.shape) + tensors[input.name] = self.handler.tensor( + dims, input.type.tensor_type.elem_type ) - check_graph(graph) - model = make_model(graph) - check_model(model) + for output in model.graph.output: + dims = _take_shape_dim(output.type.tensor_type.shape) + tensors[output.name] = self.handler.tensor( + dims, output.type.tensor_type.elem_type + ) - return model + for initializer in model.graph.initializer: + data[initializer.name] = initializer - # 拓扑排序 - if not graph.topo_sort(): - raise Exception("Sorting fails") - - ops = graph.operators() # 图中所有算子(节点) - - ctx = Context() - - for op in ops: - ty, name = ctx.name_op(op) - inputs = [ctx.push_input(it) for it in op.inputs()] - outputs = [ - ctx.push_output("{}_{}".format(name, i), it) - for (i, it) in enumerate(op.outputs()) - ] - if ty == backend.OpType.Matmul: - ctx.push_node(make_node("MatMul", inputs, outputs, name)) - elif ty == backend.OpType.BatchNorm: - raise Exception("TODO") - elif ty == backend.OpType.MaxPool: - raise Exception("TODO") - elif ty == backend.OpType.AvgPool: - raise Exception("TODO") - elif ty in [ - backend.OpType.Add, - backend.OpType.Sub, - backend.OpType.Mul, - backend.OpType.Div, - backend.OpType.Pow, - backend.OpType.Relu, - backend.OpType.Sigmoid, - backend.OpType.Tanh, - backend.OpType.Softmax, - backend.OpType.Abs, - backend.OpType.Identity, - ]: - ctx.push_node(make_node(ty.name, inputs, outputs, name)) - elif ty == backend.OpType.Flatten: - raise Exception("TODO") - elif ty == backend.OpType.Reshape: - shape = backend.reshape_shape_of(op) - inputs.append( - ctx.push_data_input( - name, - "shape", - TensorProto.INT32, - [len(shape)], - shape, + for node in model.graph.node: + if node.op_type == "Conv": + attributes = _parse_attribute( + node, + { + "dilations": [1, 1], + "pads": [0, 0], + "strides": [1, 1], + }, ) - ) - ctx.push_node(make_node(ty.name, inputs, outputs, name)) - elif ty == backend.OpType.Concat: - axis = backend.concat_axis_of(op) - ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis)) - elif ty == backend.OpType.Gather: - axis = backend.gather_axis_of(op) - ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis)) - elif ty == backend.OpType.ReduceMean: - axes = backend.reduce_mean_axes_of(op) - inputs.append( - ctx.push_data_input(name, "axes", TensorProto.INT32, [len(axes)], axes) - ) - ctx.push_node(make_node(ty.name, inputs, outputs, name, keepdims=1)) - elif ty == backend.OpType.Slice: - raise Exception("TODO") - elif ty == backend.OpType.Pad: - raise Exception("TODO") - else: - raise Exception("Unsupported OpType {}".format(ty.name)) + (d, p, s) = ( + attributes[name] for name in ["dilations", "pads", "strides"] + ) + tensors[node.output[0]] = self.handler.conv( + tensors[node.input[0]], + tensors[node.input[1]], + tensors.get(node.output[0]), + p[0], + p[1], + s[0], + s[1], + d[0], + d[1], + ) + elif node.op_type == "MatMul": + tensors[node.output[0]] = self.handler.matmul( + tensors[node.input[0]], + tensors[node.input[1]], + tensors.get(node.output[0]), + False, + False, + None, + backend.ActType.Linear, + ) + elif node.op_type == "Gemm": + attributes = _parse_attribute( + node, {"alpha": 1.0, "beta": 1.0, "transA": 0, "transB": 0} + ) + (alpha, beta, transA, transB) = ( + attributes[name] for name in ["alpha", "beta", "transA", "transB"] + ) + # TODO 不支持这些参数 + assert alpha == 1.0 + assert beta == 1.0 + tensors[node.output[0]] = self.handler.matmul( + tensors[node.input[0]], + tensors[node.input[1]], + tensors.get(node.output[0]), + transA == 1, + transB == 1, + tensors[node.input[2]] if len(node.input) > 2 else None, + backend.ActType.Linear, + ) + elif node.op_type == "BatchNormalization": + (input, mean, var, scale, bias) = ( + tensors[node.input[i]] for i in [0, 3, 4, 1, 2] + ) + output = tensors.get(node.output[0]) + attributes = _parse_attribute( + node, {"momentum": 0.9, "epsilon": 1e-05, "training_mode": 0} + ) + (momentum, eps, training) = ( + attributes[name] + for name in ["momentum", "epsilon", "training_mode"] + ) + tensors[node.output[0]] = self.handler.batchNorm( + input, output, mean, var, scale, bias, momentum, eps, training != 0 + ) + elif node.op_type == "MaxPool": + attributes = _parse_attribute( + node, + { + "kernel_shape": None, + "dilations": [1, 1], + "pads": [0, 0], + "strides": [1, 1], + }, + ) + (k, d, p, s) = ( + attributes[name] + for name in ["kernel_shape", "dilations", "pads", "strides"] + ) + tensors[node.output[0]] = self.handler.maxPool( + tensors[node.input[0]], + tensors.get(node.output[0]), + k[0], + k[1], + d[0], + d[1], + p[0], + p[1], + s[0], + s[1], + ) + elif node.op_type == "AveragePool": + attributes = _parse_attribute( + node, + { + "kernel_shape": None, + "pads": [0, 0], + "strides": [1, 1], + }, + ) + (k, p, s) = ( + attributes[name] for name in ["kernel_shape", "pads", "strides"] + ) + tensors[node.output[0]] = self.handler.avgPool( + tensors[node.input[0]], + tensors.get(node.output[0]), + k[0], + k[1], + 1, + 1, + p[0], + p[1], + s[0], + s[1], + ) + elif node.op_type == "GlobalAveragePool": + shape = next( + ( + value.type.tensor_type.shape + for value in model.graph.value_info + if value.name == node.input[0] + ), + None, + ) or next( + input.type.tensor_type.shape + for input in model.graph.input + if input.name == node.input[0] + ) + [_, _, h, w] = _take_shape_dim(shape) + tensors[node.output[0]] = self.handler.avgPool( + tensors[node.input[0]], + tensors.get(node.output[0]), + h, + w, + 1, + 1, + 0, + 0, + 1, + 1, + ) + elif node.op_type == "Add": + tensors[node.output[0]] = self.handler.add( + tensors[node.input[0]], + tensors[node.input[1]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Sub": + tensors[node.output[0]] = self.handler.sub( + tensors[node.input[0]], + tensors[node.input[1]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Mul": + tensors[node.output[0]] = self.handler.mul( + tensors[node.input[0]], + tensors[node.input[1]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Div": + tensors[node.output[0]] = self.handler.div( + tensors[node.input[0]], + tensors[node.input[1]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Pow": + tensors[node.output[0]] = self.handler.pow( + tensors[node.input[0]], + tensors[node.input[1]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Relu": + tensors[node.output[0]] = self.handler.relu( + tensors[node.input[0]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Sigmoid": + tensors[node.output[0]] = self.handler.sigmoid( + tensors[node.input[0]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Tanh": + tensors[node.output[0]] = self.handler.tanh( + tensors[node.input[0]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Softmax": + tensors[node.output[0]] = self.handler.softmax( + tensors[node.input[0]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Abs": + tensors[node.output[0]] = self.handler.abs( + tensors[node.input[0]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Identity": + tensors[node.output[0]] = self.handler.identity( + tensors[node.input[0]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Flatten": + # TODO 后端算子不支持沿任意轴展开 + axis = next( + (attr.i for attr in node.attribute if attr.name == "axis"), None + ) + assert axis == None or axis == 1 + tensors[node.output[0]] = self.handler.flatten( + tensors[node.input[0]], + tensors.get(node.output[0]), + ) + elif node.op_type == "Reshape": + input_shape = next( + ( + value.type.tensor_type.shape + for value in model.graph.value_info + if value.name == node.input[0] + ), + None, + ) or next( + input.type.tensor_type.shape + for input in model.graph.input + if input.name == node.input[0] + ) + dims = _take_shape_dim(input_shape) + size = reduce(lambda acc, x: acc * x, dims) + output_shape = [int(i) for i in data[node.input[1]].int64_data] + for i, x in enumerate(output_shape): + if x == 0: + output_shape[i] = dims[i] + temp = reduce(lambda acc, x: acc * x, output_shape) + if temp < 0: + output_shape[output_shape.index(-1)] = size // -temp + tensors[node.output[0]] = self.handler.reshape( + tensors[node.input[0]], + tensors.get(node.output[0]), + output_shape, + ) + elif node.op_type == "Concat": + tensors[node.output[0]] = self.handler.concat( + [tensors[name] for name in node.input], + tensors.get(node.output[0]), + next((attr.i for attr in node.attribute if attr.name == "axis")), + ) + elif node.op_type == "Gather": + tensors[node.output[0]] = self.handler.gather( + tensors[node.input[0]], + tensors[node.input[1]], + tensors.get(node.output[0]), + next((attr.i for attr in node.attribute if attr.name == "axis")), + ) + elif node.op_type == "ReduceMean": + tensors[node.output[0]] = self.handler.reduceMean( + tensors[node.input[0]], + tensors.get(node.output[0]), + tensors[node.input[1]] if len(node.input) > 1 else None, + next((attr.i for attr in node.attribute if attr.name == "keepdims")) + != 0, + ) + elif node.op_type == "Slice": + tensors[node.output[0]] = self.handler.slice( + tensors[node.input[0]], + tensors.get(node.output[0]), + _parse_data(data[node.input[1]]), + _parse_data(data[node.input[2]]), + _parse_data(data[node.input[3]]) if len(node.input) > 3 else None, + _parse_data(data[node.input[4]]) if len(node.input) > 4 else None, + ) + elif node.op_type == "Pad": + tensors[node.output[0]] = self.handler.pad( + tensors[node.input[0]], + tensors.get(node.output[0]), + _parse_data(data[node.input[1]]), + _parse_data(data[node.input[3]]) if len(node.input) > 3 else None, + ) + else: + raise Exception('Unsupported operator "{}"'.format(node.op_type)) - return ctx.build(name) + self.handler.data_malloc() + + for name, obj in tensors.items(): + tensor = data.get(name) + if tensor == None: + if any(input.name == name for input in model.graph.input): + self.inputs[name] = obj + else: + if tensor.data_type == TensorProto.INT32: + self.handler.copy_int32(obj, [int(i) for i in tensor.int32_data]) + elif tensor.data_type == TensorProto.INT64: + self.handler.copy_int64(obj, [int(i) for i in tensor.int64_data]) + elif tensor.data_type == TensorProto.FLOAT: + self.handler.copy_float(obj, [float(i) for i in tensor.float_data]) + else: + assert False, "Unsupported Tensor Type: {}".format(tensor.data_type) + + for output in model.graph.output: + self.outputs[output.name] = tensors[output.name] + + def to_onnx(self, name: str) -> ModelProto: + class Context: + # saves object names, including tensors and operators + names: Dict[Any, str] = dict() + # counts the occurrence times of each operator for naming + count_op: Dict[backend.OpType, int] = dict() + # counts input and output tensors for naming + count_in, count_out = 0, 0 + # saves nodes (operators) + nodes: List[NodeProto] = [] + # saves global input tensors + inputs: List[ValueInfoProto] = [] + # saves global output tensors + outputs: List[ValueInfoProto] = [] + # saves global input tensors + initializers: List[TensorProto] = [] + + def name_op(self, op: backend.Operator) -> Tuple[backend.OpType, str]: + ty = op.op_type() + name = "{}{}".format(ty.name, self.count_op.setdefault(ty, 0) + 1) + self.names[op] = name + self.count_op[ty] += 1 + return ty, name + + def push_output(self, name: str, tensor: backend.Tensor) -> str: + self.names[tensor] = name + if not tensor.has_target(): + shape = tensor.shape() + dtype = backend.tensor_dtype(tensor) + value_info = make_tensor_value_info(name, dtype, shape) + check_value_info(value_info) + self.outputs.append(value_info) + return name + + def push_input(self, tensor: backend.Tensor) -> str: + name = self.names.get(tensor) + # means that this input is a global input + if name is None: + self.count_in += 1 + name = "input{}".format(self.count_in) + self.names[tensor] = name + shape = tensor.shape() + dtype = backend.tensor_dtype(tensor) + value_info = make_tensor_value_info(name, dtype, shape) + check_value_info(value_info) + self.inputs.append(value_info) + + return name + + def push_data_input( + self, + node_name: str, + attr_name: str, + elem_type: int, + shape: Sequence[int], + vals: Any, + ) -> str: + name = "{}_{}".format(node_name, attr_name) + value_info = make_tensor_value_info(name, elem_type, shape) + tensor = make_tensor(name, elem_type, shape, vals) + check_value_info(value_info) + check_tensor(tensor) + self.inputs.append(value_info) + self.initializers.append(tensor) + return name + + def push_node(self, node: NodeProto) -> None: + check_node(node) + self.nodes.append(node) + + def build(self, name: str) -> ModelProto: + print() + print(ctx.names) + print() + print(ctx.inputs) + print() + print(ctx.outputs) + print() + print(ctx.nodes) + + graph = make_graph( + self.nodes, name, self.inputs, self.outputs, self.initializers + ) + check_graph(graph) + + model = make_model(graph) + check_model(model) + + return model + + # 拓扑排序 + if not self.handler.topo_sort(): + raise Exception("Sorting fails") + + ops = self.handler.operators() # 图中所有算子(节点) + + ctx = Context() + + for op in ops: + ty, name = ctx.name_op(op) + inputs = [ctx.push_input(it) for it in op.inputs()] + outputs = [ + ctx.push_output("{}_{}".format(name, i), it) + for (i, it) in enumerate(op.outputs()) + ] + if ty == backend.OpType.Conv: + raise Exception("TODO") + elif ty == backend.OpType.Matmul: + ctx.push_node(make_node("MatMul", inputs, outputs, name)) + elif ty == backend.OpType.BatchNorm: + inputs = [inputs[i] for i in [0, 3, 4, 1, 2]] + momentum, eps, training = backend.batch_norm_attrs_of(op) + ctx.push_node( + make_node( + "BatchNormalization", + inputs, + outputs, + name, + epsilon=eps, + momentum=momentum, + training_mode=training, + ) + ) + elif ty == backend.OpType.MaxPool: + raise Exception("TODO") + elif ty == backend.OpType.AvgPool: + raise Exception("TODO") + elif ty in [ + backend.OpType.Add, + backend.OpType.Sub, + backend.OpType.Mul, + backend.OpType.Div, + backend.OpType.Pow, + backend.OpType.Relu, + backend.OpType.Sigmoid, + backend.OpType.Tanh, + backend.OpType.Softmax, + backend.OpType.Abs, + backend.OpType.Identity, + ]: + ctx.push_node(make_node(ty.name, inputs, outputs, name)) + elif ty == backend.OpType.Flatten: + raise Exception("TODO") + elif ty == backend.OpType.Reshape: + shape = backend.reshape_shape_of(op) + inputs.append( + ctx.push_data_input( + name, + "shape", + TensorProto.INT32, + [len(shape)], + shape, + ) + ) + ctx.push_node(make_node(ty.name, inputs, outputs, name)) + elif ty == backend.OpType.Concat: + axis = backend.concat_axis_of(op) + ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis)) + elif ty == backend.OpType.Gather: + axis = backend.gather_axis_of(op) + ctx.push_node(make_node(ty.name, inputs, outputs, name, axis=axis)) + elif ty == backend.OpType.ReduceMean: + axes = backend.reduce_mean_axes_of(op) + inputs.append( + ctx.push_data_input( + name, "axes", TensorProto.INT32, [len(axes)], axes + ) + ) + ctx.push_node(make_node(ty.name, inputs, outputs, name, keepdims=1)) + elif ty == backend.OpType.Slice: + raise Exception("TODO") + elif ty == backend.OpType.Pad: + raise Exception("TODO") + else: + raise Exception("Unsupported OpType {}".format(ty.name)) + + return ctx.build(name) + + +def from_onnx(model: ModelProto, runtime): + stub = OnnxStub(model, runtime) + return stub.inputs, stub.outputs, stub.handler def _parse_attribute(node: NodeProto, attrs: Dict[str, Any] = dict()) -> Dict[str, Any]: diff --git a/pyinfinitensor/tests/test_onnx.py b/pyinfinitensor/tests/test_onnx.py index a529fb67..150a96e7 100644 --- a/pyinfinitensor/tests/test_onnx.py +++ b/pyinfinitensor/tests/test_onnx.py @@ -8,7 +8,7 @@ from onnx.helper import ( make_tensor_value_info, ) from onnx.checker import check_model -from pyinfinitensor.onnx import from_onnx, backend, to_onnx, cpu_runtime +from pyinfinitensor.onnx import from_onnx, backend, cpu_runtime def make_and_import_model(graph: onnx.GraphProto): @@ -305,8 +305,6 @@ class TestStringMethods(unittest.TestCase): y = handler.tensor([3, 2, 1], 12) handler.reshape(x, y, [3, 2, 1]) - to_onnx(handler, "test_frontend") - if __name__ == "__main__": unittest.main() diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 53e1376c..87599f28 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -1,4 +1,5 @@ #include "core/graph_handler.h" +#include "operators/batch_norm.h" #include "operators/concat.h" #include "operators/gather.h" #include "operators/reduce_mean.h" @@ -120,6 +121,13 @@ static Shape reshape_shape_of(Operator op) { return dynamic_cast(op.get())->getShape(); } +static std::tuple batch_norm_attrs_of(Operator op) { + IT_ASSERT(op->getOpType() == OpType::BatchNorm); + auto batchnorm = dynamic_cast(op.get()); + return std::make_tuple(batchnorm->getMomentum(), batchnorm->getEps(), + batchnorm->getTraining()); +} + void export_functions(py::module &m) { #define FUNCTION(NAME) def(#NAME, &NAME) m.def("cpu_runtime", &CpuRuntimeObj::getInstance) @@ -130,7 +138,8 @@ void export_functions(py::module &m) { .FUNCTION(reshape_shape_of) .FUNCTION(concat_axis_of) .FUNCTION(gather_axis_of) - .FUNCTION(reduce_mean_axes_of); + .FUNCTION(reduce_mean_axes_of) + .FUNCTION(batch_norm_attrs_of); #undef FUNCTION } From 40fb8390b12d4d77b55345c85175017ee2775182 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 16:49:53 +0800 Subject: [PATCH 17/29] =?UTF-8?q?feat:=20=E5=AF=BC=E5=85=A5=E6=97=B6?= =?UTF-8?q?=E4=BF=9D=E5=AD=98=E6=9D=83=E9=87=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- include/core/tensor.h | 2 +- pyinfinitensor/src/pyinfinitensor/onnx.py | 2 ++ src/ffi/ffi_infinitensor.cc | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/core/tensor.h b/include/core/tensor.h index 1edc950a..c5823be0 100644 --- a/include/core/tensor.h +++ b/include/core/tensor.h @@ -32,7 +32,7 @@ class TensorObj : public TensorBaseObj { using TensorBaseObj::getData; VType getData(const Shape &pos) const; void dataMalloc(); - UidBaseType getFuid() const { return fuid; } + inline UidBaseType getFuid() const { return fuid; } void load(std::string file_path); void save(std::string file_path); diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 28b8f514..d554883c 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -35,6 +35,7 @@ def cuda_runtime(): class OnnxStub: inputs: Dict[str, backend.Tensor] = {} outputs: Dict[str, backend.Tensor] = {} + initializer: Dict[int, TensorProto] = {} handler: backend.GraphHandler def __init__(self, model: ModelProto, runtime): @@ -348,6 +349,7 @@ class OnnxStub: if any(input.name == name for input in model.graph.input): self.inputs[name] = obj else: + self.initializer[obj.fuid()] = tensor if tensor.data_type == TensorProto.INT32: self.handler.copy_int32(obj, [int(i) for i in tensor.int32_data]) elif tensor.data_type == TensorProto.INT64: diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 87599f28..d7230f42 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -154,6 +154,7 @@ void init_graph_builder(py::module &m) { m, "CudaRuntime"); #endif py::class_>(m, "Tensor") + .def("fuid", &TensorObj::getFuid, policy::automatic) .def("shape", &TensorObj::getDims, policy::move) .def("cloneFloats", &TensorObj::cloneFloats, policy::move) .def("has_target", &TensorObj::hasTarget, policy::automatic) From afed749b7423d5b9de3a7d65dd54a3b6923f94c5 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 24 Feb 2023 16:57:37 +0800 Subject: [PATCH 18/29] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E5=AF=BC?= =?UTF-8?q?=E5=87=BA=E6=9D=83=E9=87=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- pyinfinitensor/src/pyinfinitensor/onnx.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index d554883c..d20b4ca7 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -22,7 +22,7 @@ from onnx.checker import ( check_tensor, ) from onnx.shape_inference import infer_shapes -from typing import Dict, List, Any, Tuple, Sequence, Union +from typing import Dict, List, Any, Tuple, Sequence, Union, Optional from functools import reduce cpu_runtime = backend.cpu_runtime() @@ -365,7 +365,7 @@ class OnnxStub: def to_onnx(self, name: str) -> ModelProto: class Context: # saves object names, including tensors and operators - names: Dict[Any, str] = dict() + names: Dict[Union[backend.Tensor, backend.Operator], str] = dict() # counts the occurrence times of each operator for naming count_op: Dict[backend.OpType, int] = dict() # counts input and output tensors for naming @@ -396,7 +396,9 @@ class OnnxStub: self.outputs.append(value_info) return name - def push_input(self, tensor: backend.Tensor) -> str: + def push_input( + self, tensor: backend.Tensor, init: Optional[TensorProto] + ) -> str: name = self.names.get(tensor) # means that this input is a global input if name is None: @@ -408,7 +410,9 @@ class OnnxStub: value_info = make_tensor_value_info(name, dtype, shape) check_value_info(value_info) self.inputs.append(value_info) - + if init != None: + init.name = name + self.initializers.append(init) return name def push_data_input( @@ -462,7 +466,10 @@ class OnnxStub: for op in ops: ty, name = ctx.name_op(op) - inputs = [ctx.push_input(it) for it in op.inputs()] + inputs = [ + ctx.push_input(it, self.initializer.get(it.fuid())) + for it in op.inputs() + ] outputs = [ ctx.push_output("{}_{}".format(name, i), it) for (i, it) in enumerate(op.outputs()) From 8a871c3773a5058bad715833e2d713fd6a56dde2 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Mon, 27 Feb 2023 09:15:45 +0800 Subject: [PATCH 19/29] =?UTF-8?q?feat:=20=E5=AF=BC=E5=87=BA=20conv=20?= =?UTF-8?q?=E5=88=B0=20onnx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- pyinfinitensor/src/pyinfinitensor/onnx.py | 13 ++++++++++- src/ffi/ffi_infinitensor.cc | 27 +++++++++++++++-------- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index d20b4ca7..9af10f6e 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -475,7 +475,18 @@ class OnnxStub: for (i, it) in enumerate(op.outputs()) ] if ty == backend.OpType.Conv: - raise Exception("TODO") + ph, pw, sh, sw, dh, dw = backend.conv_attrs_of(op) + ctx.push_node( + make_node( + ty.name, + inputs, + outputs, + name, + pads=[ph, pw], + strides=[sh, sw], + dilations=[dh, dw], + ) + ) elif ty == backend.OpType.Matmul: ctx.push_node(make_node("MatMul", inputs, outputs, name)) elif ty == backend.OpType.BatchNorm: diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index d7230f42..49164e07 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -1,6 +1,7 @@ #include "core/graph_handler.h" #include "operators/batch_norm.h" #include "operators/concat.h" +#include "operators/conv.h" #include "operators/gather.h" #include "operators/reduce_mean.h" #include "operators/reshape.h" @@ -100,6 +101,20 @@ static int tensor_dtype(Tensor t) { static Ref cuda_runtime() { return make_ref(); } #endif +static std::tuple conv_attrs_of(Operator op) { + IT_ASSERT(op->getOpType() == OpType::Conv); + auto conv = dynamic_cast(op.get()); + return std::make_tuple(conv->getPh(), conv->getPw(), conv->getDh(), + conv->getDw(), conv->getSh(), conv->getSw()); +} + +static std::tuple batch_norm_attrs_of(Operator op) { + IT_ASSERT(op->getOpType() == OpType::BatchNorm); + auto batchnorm = dynamic_cast(op.get()); + return std::make_tuple(batchnorm->getMomentum(), batchnorm->getEps(), + batchnorm->getTraining()); +} + static int concat_axis_of(Operator op) { IT_ASSERT(op->getOpType() == OpType::Concat); return dynamic_cast(op.get())->getDim(); @@ -121,25 +136,19 @@ static Shape reshape_shape_of(Operator op) { return dynamic_cast(op.get())->getShape(); } -static std::tuple batch_norm_attrs_of(Operator op) { - IT_ASSERT(op->getOpType() == OpType::BatchNorm); - auto batchnorm = dynamic_cast(op.get()); - return std::make_tuple(batchnorm->getMomentum(), batchnorm->getEps(), - batchnorm->getTraining()); -} - void export_functions(py::module &m) { #define FUNCTION(NAME) def(#NAME, &NAME) m.def("cpu_runtime", &CpuRuntimeObj::getInstance) #ifdef USE_CUDA .FUNCTION(cuda_runtime) #endif + .FUNCTION(conv_attrs_of) + .FUNCTION(batch_norm_attrs_of) .FUNCTION(tensor_dtype) .FUNCTION(reshape_shape_of) .FUNCTION(concat_axis_of) .FUNCTION(gather_axis_of) - .FUNCTION(reduce_mean_axes_of) - .FUNCTION(batch_norm_attrs_of); + .FUNCTION(reduce_mean_axes_of); #undef FUNCTION } From e294e4643608977ebdca7ce005e4069f9292686c Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Mon, 27 Feb 2023 09:32:08 +0800 Subject: [PATCH 20/29] =?UTF-8?q?feat:=20=E5=AF=BC=E5=87=BA=20pool=20?= =?UTF-8?q?=E5=88=B0=20onnx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- include/operators/pooling.h | 26 ++++++++++++---------- pyinfinitensor/src/pyinfinitensor/onnx.py | 27 +++++++++++++++++++++-- src/ffi/ffi_infinitensor.cc | 12 ++++++++++ 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/include/operators/pooling.h b/include/operators/pooling.h index c14bb8ad..1a1a6edf 100644 --- a/include/operators/pooling.h +++ b/include/operators/pooling.h @@ -39,20 +39,22 @@ class PoolingObj : public OperatorObj { optional> inferShape(const TensorVec &inputs) const override; std::string toString() const override; - int numInputs() const override { return 1; } - int numOutputs() const override { return 1; } + inline int numInputs() const override { return 1; } + inline int numOutputs() const override { return 1; } - int getKh() const { return kh; } - int getKw() const { return kw; } - int getDh() const { return dh; } - int getDw() const { return dw; } - int getPh() const { return ph; } - int getPw() const { return pw; } - int getSh() const { return sh; } - int getSw() const { return sw; } + inline int getKh() const { return kh; } + inline int getKw() const { return kw; } + inline int getDh() const { return dh; } + inline int getDw() const { return dw; } + inline int getPh() const { return ph; } + inline int getPw() const { return pw; } + inline int getSh() const { return sh; } + inline int getSw() const { return sw; } - auto getPadStrideDilation() const { return tuple(ph, pw, sh, sw, dh, dw); } - auto getNCHWRS() const { return tuple(n, c, h, w, kh, kw); } + inline auto getPadStrideDilation() const { + return tuple(ph, pw, sh, sw, dh, dw); + } + inline auto getNCHWRS() const { return tuple(n, c, h, w, kh, kw); } private: vector getWorkloadVector() const override; diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 9af10f6e..0915b9bc 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -504,9 +504,32 @@ class OnnxStub: ) ) elif ty == backend.OpType.MaxPool: - raise Exception("TODO") + kh, kw, dh, dw, ph, pw, sh, sw = backend.pool_attrs_of(op) + ctx.push_node( + make_node( + ty.name, + inputs, + outputs, + name, + kernel_shape=[kh, kw], + pads=[ph, pw], + dilations=[dh, dw], + strides=[sh, sw], + ) + ) elif ty == backend.OpType.AvgPool: - raise Exception("TODO") + kh, kw, dh, dw, ph, pw, sh, sw = backend.pool_attrs_of(op) + ctx.push_node( + make_node( + "AveragePool", + inputs, + outputs, + name, + kernel_shape=[kh, kw], + pads=[ph, pw], + strides=[sh, sw], + ) + ) elif ty in [ backend.OpType.Add, backend.OpType.Sub, diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 49164e07..c9547f1c 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -3,6 +3,7 @@ #include "operators/concat.h" #include "operators/conv.h" #include "operators/gather.h" +#include "operators/pooling.h" #include "operators/reduce_mean.h" #include "operators/reshape.h" #include @@ -115,6 +116,16 @@ static std::tuple batch_norm_attrs_of(Operator op) { batchnorm->getTraining()); } +static std::tuple +pool_attrs_of(Operator op) { + IT_ASSERT(op->getOpType() == OpType::MaxPool || + op->getOpType() == OpType::AvgPool); + auto pool = dynamic_cast(op.get()); + return std::make_tuple(pool->getKh(), pool->getKw(), pool->getDh(), + pool->getDw(), pool->getPh(), pool->getPw(), + pool->getSh(), pool->getSw()); +} + static int concat_axis_of(Operator op) { IT_ASSERT(op->getOpType() == OpType::Concat); return dynamic_cast(op.get())->getDim(); @@ -144,6 +155,7 @@ void export_functions(py::module &m) { #endif .FUNCTION(conv_attrs_of) .FUNCTION(batch_norm_attrs_of) + .FUNCTION(pool_attrs_of) .FUNCTION(tensor_dtype) .FUNCTION(reshape_shape_of) .FUNCTION(concat_axis_of) From 6e1af09dd014cfb9bc4b7e71bf937923fa90b43d Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Mon, 27 Feb 2023 09:35:17 +0800 Subject: [PATCH 21/29] fix: remove print Signed-off-by: YdrMaster --- pyinfinitensor/src/pyinfinitensor/onnx.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 0915b9bc..ffb61529 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -437,15 +437,6 @@ class OnnxStub: self.nodes.append(node) def build(self, name: str) -> ModelProto: - print() - print(ctx.names) - print() - print(ctx.inputs) - print() - print(ctx.outputs) - print() - print(ctx.nodes) - graph = make_graph( self.nodes, name, self.inputs, self.outputs, self.initializers ) From c18845a2fdfbf30b068c452962217393b9703886 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Mon, 27 Feb 2023 10:03:06 +0800 Subject: [PATCH 22/29] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0=E6=8E=A8?= =?UTF-8?q?=E7=90=86=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- pyinfinitensor/src/pyinfinitensor/onnx.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index ffb61529..1f6afe19 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -572,6 +572,24 @@ class OnnxStub: return ctx.build(name) + def init(self) -> None: + self.handler.data_malloc() + + def run(self) -> None: + self.handler.run() + + def put_int32(self, name: str) -> None: + self.handler.copy_int32(self.inputs[name]) + + def put_int64(self, name: str) -> None: + self.handler.copy_int64(self.inputs[name]) + + def put_float(self, name: str) -> None: + self.handler.copy_float(self.inputs[name]) + + def take_float(self) -> List[float]: + return next(self.handler.outputs.values()).copyFloats() + def from_onnx(model: ModelProto, runtime): stub = OnnxStub(model, runtime) From e1c976568d408bd2dc071c6f49dad997fec23312 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Mon, 27 Feb 2023 10:13:55 +0800 Subject: [PATCH 23/29] =?UTF-8?q?fix:=20=E5=A2=9E=E5=8A=A0=E6=8E=A8?= =?UTF-8?q?=E7=90=86=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- pyinfinitensor/src/pyinfinitensor/onnx.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 1f6afe19..f85addb4 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -578,17 +578,17 @@ class OnnxStub: def run(self) -> None: self.handler.run() - def put_int32(self, name: str) -> None: - self.handler.copy_int32(self.inputs[name]) + def put_int32(self, name: str, data: List[int]) -> None: + self.handler.copy_int32(self.inputs[name], data) - def put_int64(self, name: str) -> None: - self.handler.copy_int64(self.inputs[name]) + def put_int64(self, name: str, data: List[int]) -> None: + self.handler.copy_int64(self.inputs[name], data) - def put_float(self, name: str) -> None: - self.handler.copy_float(self.inputs[name]) + def put_float(self, name: str, data: List[int]) -> None: + self.handler.copy_float(self.inputs[name], data) def take_float(self) -> List[float]: - return next(self.handler.outputs.values()).copyFloats() + return next(iter(self.handler.outputs.values())).copyFloats() def from_onnx(model: ModelProto, runtime): From 9db97eb212a08fcd19ce3db8a7dcaf411b286135 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Tue, 21 Mar 2023 14:00:04 +0800 Subject: [PATCH 24/29] =?UTF-8?q?refactor:=20=E6=95=B4=E5=90=88=E6=93=8D?= =?UTF-8?q?=E4=BD=9C=E5=BC=A0=E9=87=8F=E6=95=B0=E6=8D=AE=E7=9A=84=E6=96=B9?= =?UTF-8?q?=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- include/core/graph_handler.h | 6 +- include/core/tensor.h | 72 ++++++++-------- include/core/tensor_base.h | 1 - src/core/tensor.cc | 24 ++---- src/core/tensor_base.cc | 7 +- src/ffi/ffi_infinitensor.cc | 2 +- src/kernels/cpu/membound.cc | 4 +- src/utils/dataloader.cc | 4 +- test/core/test_graph.cc | 20 ++--- test/core/test_search.cc | 4 +- test/core/test_tensor_save.cc | 8 +- test/kernels/cuda/test_cuda_batch_norm.cc | 4 +- test/kernels/cuda/test_cuda_gather.cc | 8 +- test/kernels/cuda/test_cuda_reduce_mean.cc | 2 +- test/kernels/cuda/test_cuda_resize.cc | 96 +++++++++++----------- test/operators/test_conv.cc | 2 +- test/operators/test_resize.cc | 12 +-- 17 files changed, 133 insertions(+), 143 deletions(-) diff --git a/include/core/graph_handler.h b/include/core/graph_handler.h index ff702cf8..72f7a6a3 100644 --- a/include/core/graph_handler.h +++ b/include/core/graph_handler.h @@ -90,15 +90,15 @@ class GraphHandlerObj { inline void data_malloc() { g->dataMalloc(); } inline void copy_int32(Tensor tensor, std::vector list) { - tensor->copyData(list); + tensor->copyin(list); } inline void copy_int64(Tensor tensor, std::vector list) { - tensor->copyData(list); + tensor->copyin(list); } inline void copy_float(Tensor tensor, std::vector list) { - tensor->copyData(list); + tensor->copyin(list); } inline void run() { g->getRuntime()->run(g); } diff --git a/include/core/tensor.h b/include/core/tensor.h index c5823be0..af3ed3f6 100644 --- a/include/core/tensor.h +++ b/include/core/tensor.h @@ -15,39 +15,61 @@ using Shape = vector; class TensorObj : public TensorBaseObj { private: Shape shape; - Fuid fuid; // Cloned tensors share the same id. Tensors constructed from - // scratch have a new id. + size_t _size; // Cache of Π(shape). + Fuid fuid; // Cloned tensors share the same id. Tensors constructed from + // scratch have a new id. + + inline void copyin(const void *ptr, size_t size) { + runtime->copyBlobFromCPU(getRawDataPtr(), ptr, size); + } + inline void copyout(void *ptr, size_t size) const { + runtime->copyBlobToCPU(ptr, getRawDataPtr(), size); + } public: - TensorObj(const Shape &shape, DataType dtype, Runtime runtime); + TensorObj(Shape shape, DataType dtype, Runtime runtime); virtual ~TensorObj() {} string toString() const override; - size_t size() const; - size_t getBytes() const; + inline size_t size() const { return _size; } + inline size_t getBytes() const { return _size * dtype.getSize(); } Shape getDims() const { return shape; } vector getStride() const; - size_t getOffset(const Shape &ds) const; - using TensorBaseObj::getData; - VType getData(const Shape &pos) const; + size_t getOffset(const vector &ds) const; void dataMalloc(); inline UidBaseType getFuid() const { return fuid; } void load(std::string file_path); void save(std::string file_path); - template void copyData(const T *dptr) { + // Copy elements from `data`. + template inline void copyin(const vector &data) { IT_ASSERT(DataType::get() == dtype); - IT_ASSERT(data != nullptr); - runtime->copyBlobFromCPU(getRawDataPtr(), dptr, getBytes()); + IT_ASSERT(data.size() >= _size); + copyin(data.data(), getBytes()); + } + // Copy all the elements to a vector. + template inline auto copyout() const { + IT_ASSERT(DataType::get() == dtype); + std::vector ans(_size); + copyout(ans.data(), getBytes()); + return ans; + } + // Copy the element at `pos`. + template inline auto copyout(const vector &pos) const { + IT_ASSERT(DataType::get() == dtype); + auto offset = getOffset(pos); + auto bytes = dtype.getSize(); + T ans; + runtime->copyBlobToCPU(&ans, getRawDataPtr() + offset * bytes, + bytes); + return ans; } - template void copyData(vector dataVector) { - IT_ASSERT(DataType::get() == dtype); - IT_ASSERT(dataVector.size() >= size()); - copyData(dataVector.data()); - } + inline auto copyoutFloat() const { return copyout(); } + inline auto copyoutInt32() const { return copyout(); } + inline auto copyoutInt64() const { return copyout(); } void copyData(const TensorObj *src); void copyData(const Tensor &src) { copyData(src.get()); } @@ -72,24 +94,6 @@ class TensorObj : public TensorBaseObj { } return obj; } - inline std::vector cloneFloats() const { - IT_ASSERT(data != nullptr); - IT_ASSERT(getDType() == DataType::Float32); - std::vector ans(size()); - auto src = getRawDataPtr(); - auto dst = ans.data(); - auto bytes = getBytes(); - if (runtime->isCpu()) { - memcpy(dst, src, bytes); - } else { -#if USE_CUDA - cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); -#else - IT_TODO_HALT(); -#endif - } - return ans; - } void printData() const; bool equalData(const Tensor &rhs) const; diff --git a/include/core/tensor_base.h b/include/core/tensor_base.h index 09286a63..4c73094a 100644 --- a/include/core/tensor_base.h +++ b/include/core/tensor_base.h @@ -41,7 +41,6 @@ class TensorBaseObj : public Object { IT_ASSERT(data != nullptr); return data->getPtr(); } - VType getData(size_t offset) const; DataType getDType() const { return dtype; } Runtime getRuntime() const { return runtime; } diff --git a/src/core/tensor.cc b/src/core/tensor.cc index 5f04e114..b8a41728 100644 --- a/src/core/tensor.cc +++ b/src/core/tensor.cc @@ -3,15 +3,16 @@ #include "core/operator.h" #include "core/runtime.h" #include "utils/dataloader.h" +#include namespace infini { -TensorObj::TensorObj(const Shape &shape, DataType dtype, Runtime runtime) - : TensorBaseObj(shape.size(), dtype, runtime), shape(shape) {} - -VType TensorObj::getData(const Shape &pos) const { - return getData(getOffset(pos)); -} +TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime) + : TensorBaseObj(shape.size(), dtype, runtime), shape(std::move(shape_)), + _size(shape.empty() + ? 0 + : std::accumulate(shape.begin(), shape.end(), 1, + [](auto acc, auto x) { return acc * x; })) {} string TensorObj::toString() const { string ret = "Tensor " + std::to_string(guid) + ", Fuid " + @@ -28,7 +29,7 @@ string TensorObj::toString() const { return ret; } -size_t TensorObj::getOffset(const Shape &pos) const { +size_t TensorObj::getOffset(const vector &pos) const { auto nDim = pos.size(); IT_ASSERT(shape.size() == nDim); if (pos.empty()) @@ -53,15 +54,6 @@ vector TensorObj::getStride() const { return ret; } -size_t TensorObj::size() const { - size_t ret = 1; - for (const auto &d : shape) - ret *= d; - return ret; -} - -size_t TensorObj::getBytes() const { return size() * dtype.getSize(); } - void TensorObj::printData() const { IT_ASSERT(data != nullptr); if (!runtime->isCpu()) diff --git a/src/core/tensor_base.cc b/src/core/tensor_base.cc index 98409322..62612cc8 100644 --- a/src/core/tensor_base.cc +++ b/src/core/tensor_base.cc @@ -6,9 +6,4 @@ namespace infini { TensorBaseObj::TensorBaseObj(int dim, DataType dtype, Runtime runtime) : dim(dim), dtype(dtype), runtime(runtime) {} -VType TensorBaseObj::getData(size_t offset) const { - // TODO: check cuda array - return (data->getPtr())[offset]; -} - -}; // namespace infini \ No newline at end of file +}; // namespace infini diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index c9547f1c..641c2c3b 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -177,7 +177,7 @@ void init_graph_builder(py::module &m) { py::class_>(m, "Tensor") .def("fuid", &TensorObj::getFuid, policy::automatic) .def("shape", &TensorObj::getDims, policy::move) - .def("cloneFloats", &TensorObj::cloneFloats, policy::move) + .def("copyoutFloat", &TensorObj::copyoutFloat, policy::move) .def("has_target", &TensorObj::hasTarget, policy::automatic) .def("src", &TensorObj::getOutputOf, policy::move); py::class_>(m, "Operator") diff --git a/src/kernels/cpu/membound.cc b/src/kernels/cpu/membound.cc index 361ee5ac..31755389 100644 --- a/src/kernels/cpu/membound.cc +++ b/src/kernels/cpu/membound.cc @@ -64,7 +64,7 @@ class MemboundInterpreter : public Kernel { vector valsUint(vals.size()); for (size_t i = 0; i < vals.size(); ++i) valsUint[i] = (uint32_t)vals[i]; - output->copyData(valsUint); + output->copyin(valsUint); } void compute(const Operator &op, const RuntimeObj *context) const override { @@ -81,4 +81,4 @@ class MemboundInterpreter : public Kernel { REGISTER_KERNEL(Device::CPU, OpType::MemBound, DataType::UInt32, MemboundInterpreter, "MemboundInterpreter_CPU"); -} // namespace infini \ No newline at end of file +} // namespace infini diff --git a/src/utils/dataloader.cc b/src/utils/dataloader.cc index 1de0cd8d..73ce34fc 100644 --- a/src/utils/dataloader.cc +++ b/src/utils/dataloader.cc @@ -59,13 +59,13 @@ void loadTensorData(TensorObj *tensor, std::string file_path) { for (int i = 0; i < temp.data_float_size(); ++i) { data_temp.push_back(temp.data_float(i)); } - tensor->copyData(data_temp); + tensor->copyin(data_temp); } else if (tensor->getDType() == DataType::UInt32) { std::vector data_temp; for (int i = 0; i < temp.data_uint32_size(); ++i) { data_temp.push_back(temp.data_uint32(i)); } - tensor->copyData(data_temp); + tensor->copyin(data_temp); } else { IT_TODO_HALT(); } diff --git a/test/core/test_graph.cc b/test/core/test_graph.cc index 65bcf68a..8140ea43 100644 --- a/test/core/test_graph.cc +++ b/test/core/test_graph.cc @@ -15,8 +15,8 @@ TEST(Graph, build_and_run) { Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32); Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32); g->dataMalloc(); - i0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - w0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + i0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + w0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); auto matmul = g->addOpWithOutputs(i0, w0, o0); g->print(); // check inputOf and outputsOf for tensor @@ -33,7 +33,7 @@ TEST(Graph, build_and_run) { // check execution results auto ans = make_ref(Shape{1, 2, 4}, DataType::UInt32, runtime); ans->dataMalloc(); - ans->copyData(vector{38, 44, 50, 56, 83, 98, 113, 128}); + ans->copyin(vector{38, 44, 50, 56, 83, 98, 113, 128}); EXPECT_TRUE(o0->equalData(ans)); } @@ -84,8 +84,8 @@ TEST(Graph, perf_engine) { auto matmul = g->addOp(i0, w0, nullptr); g->dataMalloc(); - i0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - w0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + i0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + w0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); runtime->run(g, true, true); double perfTime = runtime->getPerfTime(g); // The example matmul takes 0.0036ms with one core @@ -94,7 +94,7 @@ TEST(Graph, perf_engine) { // check answer auto ans = make_ref(Shape{1, 2, 4}, DataType::UInt32, runtime); ans->dataMalloc(); - ans->copyData(vector{38, 44, 50, 56, 83, 98, 113, 128}); + ans->copyin(vector{38, 44, 50, 56, 83, 98, 113, 128}); EXPECT_TRUE(matmul->getOutput()->equalData(ans)); } @@ -105,8 +105,8 @@ TEST(Graph, test_tensor_id) { Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32); Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32); g->dataMalloc(); - i0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - w0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + i0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + w0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); auto i1 = g->addTensor(i0->clone()); auto matmul = g->addOpWithOutputs(i0, w0, o0); g->print(); @@ -123,8 +123,8 @@ TEST(Graph, test_OpVec_ctor) { Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32); Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32); g->dataMalloc(); - i0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - w0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + i0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + w0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); auto o1 = g->addTensor(o0->clone()); auto matmul = g->addOpWithOutputs(i0, w0, o0); g->addOp(o1, nullptr); diff --git a/test/core/test_search.cc b/test/core/test_search.cc index 0e882182..5f531c21 100644 --- a/test/core/test_search.cc +++ b/test/core/test_search.cc @@ -19,8 +19,8 @@ namespace infini { // Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32); // Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32); // g->dataMalloc(); -// i0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); -// w0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); +// i0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); +// w0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); // auto matmul = g->addOpWithOutputs(i0, w0, o0); // g->print(); // // check inputOf and outputsOf for tensor diff --git a/test/core/test_tensor_save.cc b/test/core/test_tensor_save.cc index d310ab84..086e6455 100644 --- a/test/core/test_tensor_save.cc +++ b/test/core/test_tensor_save.cc @@ -14,10 +14,10 @@ TEST(Prtotbuf, save_and_load) { Tensor u0 = g->addTensor({1, 3, 4}, DataType::UInt32); Tensor u1 = g->addTensor({1, 3, 4}, DataType::UInt32); g->dataMalloc(); - i0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - w0->copyData(vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); - u0->copyData(vector{1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 0, 0}); - u1->copyData(vector{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}); + i0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + w0->copyin(vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + u0->copyin(vector{1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 0, 0}); + u1->copyin(vector{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}); i0->save("i0.pb"); w0->printData(); w0->load("i0.pb"); diff --git a/test/kernels/cuda/test_cuda_batch_norm.cc b/test/kernels/cuda/test_cuda_batch_norm.cc index e3a5ef04..0f92710d 100644 --- a/test/kernels/cuda/test_cuda_batch_norm.cc +++ b/test/kernels/cuda/test_cuda_batch_norm.cc @@ -22,8 +22,8 @@ TEST(CUDA_BatchNorm, run) { // Build input data on CPU gCpu->dataMalloc(); iCpu->setData(IncrementalGenerator()); - meanCpu->copyData(vector{1, 6, 9}); - varCpu->copyData(vector{4, 1, 9}); + meanCpu->copyin(vector{1, 6, 9}); + varCpu->copyin(vector{4, 1, 9}); scaleCpu->setData(OneGenerator()); biasCpu->setData(ZeroGenerator()); diff --git a/test/kernels/cuda/test_cuda_gather.cc b/test/kernels/cuda/test_cuda_gather.cc index 9108d719..807ea0ff 100644 --- a/test/kernels/cuda/test_cuda_gather.cc +++ b/test/kernels/cuda/test_cuda_gather.cc @@ -181,8 +181,8 @@ TEST(Gather, Cuda) { auto input = gCpu->addTensor({3, 2}, DataType::Float32); auto index = gCpu->addTensor({2, 2}, DataType::UInt32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4, 5, 6}); - index->copyData(vector{0, 1, 1, 2}); + input->copyin(vector{1, 2, 3, 4, 5, 6}); + index->copyin(vector{0, 1, 1, 2}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -203,7 +203,7 @@ TEST(Gather, Cuda) { auto index = gCpu->addTensor({1, 2}, DataType::UInt32); gCpu->dataMalloc(); input->setData(IncrementalGenerator()); - index->copyData(vector{0, 2}); + index->copyin(vector{0, 2}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -224,7 +224,7 @@ TEST(Gather, Cuda) { auto index = gCpu->addTensor({3, 1}, DataType::UInt32); gCpu->dataMalloc(); input->setData(IncrementalGenerator()); - index->copyData(vector{0, 3, 1}); + index->copyin(vector{0, 3, 1}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); diff --git a/test/kernels/cuda/test_cuda_reduce_mean.cc b/test/kernels/cuda/test_cuda_reduce_mean.cc index 90356994..ff309635 100644 --- a/test/kernels/cuda/test_cuda_reduce_mean.cc +++ b/test/kernels/cuda/test_cuda_reduce_mean.cc @@ -18,7 +18,7 @@ void test_reducemean(const Shape &shape, const vector &data, // Build input data on CPU Tensor icpu = make_ref(shape, DataType::Float32, cpuRuntime); icpu->dataMalloc(); - icpu->copyData(data); + icpu->copyin(data); // Build CUDA graph Graph g = make_ref(cudaRuntime); diff --git a/test/kernels/cuda/test_cuda_resize.cc b/test/kernels/cuda/test_cuda_resize.cc index 913f8acd..b462f516 100644 --- a/test/kernels/cuda/test_cuda_resize.cc +++ b/test/kernels/cuda/test_cuda_resize.cc @@ -13,8 +13,8 @@ TEST(Resize, Cuda_downsample_sizes_nearest) { auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32); auto sizes = gCpu->addTensor({4}, DataType::UInt32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8}); - sizes->copyData(vector{1, 1, 1, 3}); + input->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8}); + sizes->copyin(vector{1, 1, 1, 3}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -38,8 +38,8 @@ TEST(Resize, Cuda_upsample_sizes_nearest_notlarger) { auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32); auto sizes = gCpu->addTensor({2}, DataType::UInt32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4}); - sizes->copyData(vector{7, 8}); + input->copyin(vector{1, 2, 3, 4}); + sizes->copyin(vector{7, 8}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -68,8 +68,8 @@ TEST(Resize, Cuda_upsample_sizes_nearest_notsmaller) { auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32); auto sizes = gCpu->addTensor({2}, DataType::UInt32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4}); - sizes->copyData(vector{7, 8}); + input->copyin(vector{1, 2, 3, 4}); + sizes->copyin(vector{7, 8}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -98,9 +98,9 @@ TEST(Resize, Cuda_upsample_sizes_nearest_ceil_half_pixel) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto sizes = gCpu->addTensor({4}, DataType::UInt32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - sizes->copyData(vector{1, 1, 8, 8}); + sizes->copyin(vector{1, 1, 8, 8}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -131,9 +131,9 @@ TEST(Resize, Cuda_upsample_sizes_nearest_floor_align_corners) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto sizes = gCpu->addTensor({2}, DataType::UInt32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - sizes->copyData(vector{8, 8}); + sizes->copyin(vector{8, 8}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -164,9 +164,9 @@ TEST(Resize, Cuda_upsample_sizes_nearest_round_prefer_ceil_asymmetri) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto sizes = gCpu->addTensor({4}, DataType::UInt32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - sizes->copyData(vector{1, 1, 8, 8}); + sizes->copyin(vector{1, 1, 8, 8}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -197,8 +197,8 @@ TEST(Resize, Cuda_downsample_scales_nearest) { auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8}); - scales->copyData(vector{1, 1, 0.6, 0.6}); + input->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8}); + scales->copyin(vector{1, 1, 0.6, 0.6}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -221,8 +221,8 @@ TEST(Resize, Cuda_upsample_scales_nearest) { auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4}); - scales->copyData(vector{1, 1, 2, 3}); + input->copyin(vector{1, 2, 3, 4}); + scales->copyin(vector{1, 1, 2, 3}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -247,8 +247,8 @@ TEST(Resize, Cuda_upsample_scales_nearest_axes_3_2) { auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32); auto scales = gCpu->addTensor({2}, DataType::Float32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4}); - scales->copyData(vector{3, 2}); + input->copyin(vector{1, 2, 3, 4}); + scales->copyin(vector{3, 2}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -273,8 +273,8 @@ TEST(Resize, Cuda_downsample_scales_linear) { auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8}); - scales->copyData(vector{1, 1, 0.6, 0.6}); + input->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8}); + scales->copyin(vector{1, 1, 0.6, 0.6}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -297,8 +297,8 @@ TEST(Resize, Cuda_downsample_scales_linear_aligncorners) { auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8}); - scales->copyData(vector{1, 1, 0.6, 0.6}); + input->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8}); + scales->copyin(vector{1, 1, 0.6, 0.6}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -323,8 +323,8 @@ TEST(Resize, Cuda_upsample_scales_linear) { auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4}); - scales->copyData(vector{1, 1, 2, 2}); + input->copyin(vector{1, 2, 3, 4}); + scales->copyin(vector{1, 1, 2, 2}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -349,8 +349,8 @@ TEST(Resize, Cuda_upsample_scales_linear_align_corners) { auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData(vector{1, 2, 3, 4}); - scales->copyData(vector{1, 1, 2, 2}); + input->copyin(vector{1, 2, 3, 4}); + scales->copyin(vector{1, 1, 2, 2}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -377,9 +377,9 @@ TEST(Resize, Cuda_downsample_sizes_linear_pytorchhalfpixel) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto sizes = gCpu->addTensor({4}, DataType::UInt32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - sizes->copyData(vector{1, 1, 3, 1}); + sizes->copyin(vector{1, 1, 3, 1}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -406,10 +406,10 @@ TEST(Resize, Cuda_tf_crop_and_resize) { auto sizes = gCpu->addTensor({4}, DataType::UInt32); auto roi = gCpu->addTensor({8}, DataType::Float32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - sizes->copyData(vector{1, 1, 3, 3}); - roi->copyData(vector{0, 0, 0.4, 0.6, 1, 1, 0.6, 0.8}); + sizes->copyin(vector{1, 1, 3, 3}); + roi->copyin(vector{0, 0, 0.4, 0.6, 1, 1, 0.6, 0.8}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -437,10 +437,10 @@ TEST(Resize, Cuda_tf_crop_and_resize_axes_3_2) { auto sizes = gCpu->addTensor({2}, DataType::UInt32); auto roi = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - sizes->copyData(vector{3, 3}); - roi->copyData(vector{0.6, 0.4, 0.8, 0.6}); + sizes->copyin(vector{3, 3}); + roi->copyin(vector{0.6, 0.4, 0.8, 0.6}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -467,9 +467,9 @@ TEST(Resize, Cuda_downsample_scales_cubic) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - scales->copyData(vector{1.0, 1.0, 0.8, 0.8}); + scales->copyin(vector{1.0, 1.0, 0.8, 0.8}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -494,9 +494,9 @@ TEST(Resize, Cuda_downsample_scales_cubic_align_corners) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - scales->copyData(vector{1.0, 1.0, 0.8, 0.8}); + scales->copyin(vector{1.0, 1.0, 0.8, 0.8}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -522,9 +522,9 @@ TEST(Resize, Cuda_upsample_scales_cubic) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - scales->copyData(vector{1.0, 1.0, 2, 2}); + scales->copyin(vector{1.0, 1.0, 2, 2}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -559,9 +559,9 @@ TEST(Resize, Cuda_upsample_scales_cubic_align_corners) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - scales->copyData(vector{1.0, 1.0, 2, 2}); + scales->copyin(vector{1.0, 1.0, 2, 2}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -598,9 +598,9 @@ TEST(Resize, Cuda_upsample_scales_cubic_asymmetric) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto scales = gCpu->addTensor({4}, DataType::Float32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - scales->copyData(vector{1.0, 1.0, 2, 2}); + scales->copyin(vector{1.0, 1.0, 2, 2}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -633,9 +633,9 @@ TEST(Resize, Cuda_downsample_sizes_cubic) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto sizes = gCpu->addTensor({4}, DataType::UInt32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - sizes->copyData(vector{1, 1, 3, 3}); + sizes->copyin(vector{1, 1, 3, 3}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); @@ -667,9 +667,9 @@ TEST(Resize, Cuda_upsample_sizes_cubic) { auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32); auto sizes = gCpu->addTensor({4}, DataType::UInt32); gCpu->dataMalloc(); - input->copyData( + input->copyin( vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - sizes->copyData(vector{1, 1, 9, 10}); + sizes->copyin(vector{1, 1, 9, 10}); auto cudaRuntime = make_ref(); Graph gCuda = make_ref(cudaRuntime); diff --git a/test/operators/test_conv.cc b/test/operators/test_conv.cc index 059fa9af..420b4ab8 100644 --- a/test/operators/test_conv.cc +++ b/test/operators/test_conv.cc @@ -61,7 +61,7 @@ TEST(Conv, NaiveCPU) { auto ans = make_ref(Shape{1, 2, 2, 2}, DataType::UInt32, runtime); ans->dataMalloc(); - ans->copyData( + ans->copyin( vector{4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656}); EXPECT_TRUE(conv->getOutput()->equalData(ans)); } diff --git a/test/operators/test_resize.cc b/test/operators/test_resize.cc index 37e2fc7d..1c36da20 100644 --- a/test/operators/test_resize.cc +++ b/test/operators/test_resize.cc @@ -12,7 +12,7 @@ TEST(Resize, ShapeInference) { Tensor i = g->addTensor({1, 1, 2, 4}, DataType::UInt32); Tensor sizes = g->addTensor({4}, DataType::UInt32); sizes->dataMalloc(); - sizes->copyData(vector{1, 1, 1, 3}); + sizes->copyin(vector{1, 1, 1, 3}); auto op = g->addOp( i, nullptr, std::nullopt, sizes, nullptr, nullptr, ResizeObj::EKeepAspectRatioPolicy::stretch); @@ -24,7 +24,7 @@ TEST(Resize, ShapeInference) { Tensor i = g->addTensor({1, 1, 2, 4}, DataType::UInt32); Tensor sizes = g->addTensor({2}, DataType::UInt32); sizes->dataMalloc(); - sizes->copyData(vector{1, 3}); + sizes->copyin(vector{1, 3}); auto op = g->addOp( i, nullptr, vector{2, 3}, sizes, nullptr, nullptr, ResizeObj::EKeepAspectRatioPolicy::stretch); @@ -36,7 +36,7 @@ TEST(Resize, ShapeInference) { Tensor i = g->addTensor({1, 3, 2, 4}, DataType::UInt32); Tensor sizes = g->addTensor({2}, DataType::UInt32); sizes->dataMalloc(); - sizes->copyData(vector{7, 8}); + sizes->copyin(vector{7, 8}); auto op = g->addOp( i, nullptr, vector{2, 3}, sizes, nullptr, nullptr, ResizeObj::EKeepAspectRatioPolicy::notLarger); @@ -48,7 +48,7 @@ TEST(Resize, ShapeInference) { Tensor i = g->addTensor({1, 3, 2, 4}, DataType::UInt32); Tensor sizes = g->addTensor({3}, DataType::UInt32); sizes->dataMalloc(); - sizes->copyData(vector{2, 6, 8}); + sizes->copyin(vector{2, 6, 8}); auto op = g->addOp( i, nullptr, vector{1, 2, 3}, sizes, nullptr, nullptr, ResizeObj::EKeepAspectRatioPolicy::notSmaller); @@ -60,7 +60,7 @@ TEST(Resize, ShapeInference) { Tensor i = g->addTensor({1, 1, 4, 4}, DataType::UInt32); Tensor scales = g->addTensor({3}, DataType::Float32); scales->dataMalloc(); - scales->copyData(vector{1, 0.8, 0.8}); + scales->copyin(vector{1, 0.8, 0.8}); auto op = g->addOp(i, nullptr, vector{1, 2, 3}, nullptr, scales, nullptr); EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 1, 3, 3})); @@ -71,7 +71,7 @@ TEST(Resize, ShapeInference) { Tensor i = g->addTensor({1, 1, 2, 2}, DataType::UInt32); Tensor scales = g->addTensor({4}, DataType::Float32); scales->dataMalloc(); - scales->copyData(vector{1, 1, 2, 2}); + scales->copyin(vector{1, 1, 2, 2}); auto op = g->addOp(i, nullptr, std::nullopt, nullptr, scales, nullptr); EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 1, 4, 4})); From 73e895b8ce4bb8488ef4527c34498fdda95953c1 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Tue, 21 Mar 2023 14:40:13 +0800 Subject: [PATCH 25/29] =?UTF-8?q?feat:=20=E5=AF=BC=E5=87=BA=E6=8B=B7?= =?UTF-8?q?=E5=87=BA=E5=BC=A0=E9=87=8F=E5=80=BC=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- src/ffi/ffi_infinitensor.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 641c2c3b..390ecb2d 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -178,6 +178,8 @@ void init_graph_builder(py::module &m) { .def("fuid", &TensorObj::getFuid, policy::automatic) .def("shape", &TensorObj::getDims, policy::move) .def("copyoutFloat", &TensorObj::copyoutFloat, policy::move) + .def("copyoutInt32", &TensorObj::copyoutInt32, policy::move) + .def("copyoutInt64", &TensorObj::copyoutInt64, policy::move) .def("has_target", &TensorObj::hasTarget, policy::automatic) .def("src", &TensorObj::getOutputOf, policy::move); py::class_>(m, "Operator") From 5aeacedab3deedd6e2a5f327f29f66346f03d01e Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Wed, 22 Mar 2023 09:46:40 +0800 Subject: [PATCH 26/29] =?UTF-8?q?fix:=20=E4=BB=8E=E6=A8=A1=E6=9D=BF?= =?UTF-8?q?=E5=AF=BC=E5=87=BA=E6=AF=8F=E4=B8=AA=E7=B1=BB=E5=9E=8B=E7=9A=84?= =?UTF-8?q?=20python=20=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- include/core/graph_handler.h | 12 ------------ include/core/tensor.h | 10 +++------- pyinfinitensor/src/pyinfinitensor/onnx.py | 12 ++++++------ src/ffi/ffi_infinitensor.cc | 14 +++++++------- 4 files changed, 16 insertions(+), 32 deletions(-) diff --git a/include/core/graph_handler.h b/include/core/graph_handler.h index 72f7a6a3..9936c637 100644 --- a/include/core/graph_handler.h +++ b/include/core/graph_handler.h @@ -89,18 +89,6 @@ class GraphHandlerObj { inline void data_malloc() { g->dataMalloc(); } - inline void copy_int32(Tensor tensor, std::vector list) { - tensor->copyin(list); - } - - inline void copy_int64(Tensor tensor, std::vector list) { - tensor->copyin(list); - } - - inline void copy_float(Tensor tensor, std::vector list) { - tensor->copyin(list); - } - inline void run() { g->getRuntime()->run(g); } }; diff --git a/include/core/tensor.h b/include/core/tensor.h index af3ed3f6..ed95f5f5 100644 --- a/include/core/tensor.h +++ b/include/core/tensor.h @@ -57,20 +57,16 @@ class TensorObj : public TensorBaseObj { return ans; } // Copy the element at `pos`. - template inline auto copyout(const vector &pos) const { + template inline auto copyOne(const vector &pos) const { IT_ASSERT(DataType::get() == dtype); auto offset = getOffset(pos); auto bytes = dtype.getSize(); T ans; - runtime->copyBlobToCPU(&ans, getRawDataPtr() + offset * bytes, - bytes); + runtime->copyBlobToCPU( + &ans, getRawDataPtr() + offset * bytes, bytes); return ans; } - inline auto copyoutFloat() const { return copyout(); } - inline auto copyoutInt32() const { return copyout(); } - inline auto copyoutInt64() const { return copyout(); } - void copyData(const TensorObj *src); void copyData(const Tensor &src) { copyData(src.get()); } void setData( diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index f85addb4..4ec4f365 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -101,7 +101,7 @@ class OnnxStub: (alpha, beta, transA, transB) = ( attributes[name] for name in ["alpha", "beta", "transA", "transB"] ) - # TODO 不支持这些参数 + # FIXME unsupport attributes: `alpha` `beta` assert alpha == 1.0 assert beta == 1.0 tensors[node.output[0]] = self.handler.matmul( @@ -265,7 +265,7 @@ class OnnxStub: tensors.get(node.output[0]), ) elif node.op_type == "Flatten": - # TODO 后端算子不支持沿任意轴展开 + # FIXME axis must be 1 axis = next( (attr.i for attr in node.attribute if attr.name == "axis"), None ) @@ -315,7 +315,7 @@ class OnnxStub: next((attr.i for attr in node.attribute if attr.name == "axis")), ) elif node.op_type == "ReduceMean": - tensors[node.output[0]] = self.handler.reduceMean( + tensors[node.output[0]] = self.handler.reduce_mean( tensors[node.input[0]], tensors.get(node.output[0]), tensors[node.input[1]] if len(node.input) > 1 else None, @@ -351,11 +351,11 @@ class OnnxStub: else: self.initializer[obj.fuid()] = tensor if tensor.data_type == TensorProto.INT32: - self.handler.copy_int32(obj, [int(i) for i in tensor.int32_data]) + obj.copyin_int32([int(i) for i in tensor.int32_data]) elif tensor.data_type == TensorProto.INT64: - self.handler.copy_int64(obj, [int(i) for i in tensor.int64_data]) + obj.copyin_int64([int(i) for i in tensor.int64_data]) elif tensor.data_type == TensorProto.FLOAT: - self.handler.copy_float(obj, [float(i) for i in tensor.float_data]) + obj.copyin_float([int(i) for i in tensor.float_data]) else: assert False, "Unsupported Tensor Type: {}".format(tensor.data_type) diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 390ecb2d..627be8bf 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -177,9 +177,12 @@ void init_graph_builder(py::module &m) { py::class_>(m, "Tensor") .def("fuid", &TensorObj::getFuid, policy::automatic) .def("shape", &TensorObj::getDims, policy::move) - .def("copyoutFloat", &TensorObj::copyoutFloat, policy::move) - .def("copyoutInt32", &TensorObj::copyoutInt32, policy::move) - .def("copyoutInt64", &TensorObj::copyoutInt64, policy::move) + .def("copyin_float", &TensorObj::copyin, policy::move) + .def("copyin_int32", &TensorObj::copyin, policy::move) + .def("copyin_int64", &TensorObj::copyin, policy::move) + .def("copyout_float", &TensorObj::copyout, policy::move) + .def("copyout_int32", &TensorObj::copyout, policy::move) + .def("copyout_int64", &TensorObj::copyout, policy::move) .def("has_target", &TensorObj::hasTarget, policy::automatic) .def("src", &TensorObj::getOutputOf, policy::move); py::class_>(m, "Operator") @@ -212,15 +215,12 @@ void init_graph_builder(py::module &m) { .def("reshape", &Handler::reshape, policy::move) .def("concat", &Handler::concat, policy::move) .def("gather", &Handler::gather, policy::move) - .def("reduceMean", &Handler::reduceMean, policy::move) + .def("reduce_mean", &Handler::reduceMean, policy::move) .def("slice", &Handler::slice, policy::move) .def("pad", &Handler::pad, policy::move) .def("topo_sort", &Handler::topo_sort, policy::automatic) .def("operators", &Handler::operators, policy::move) .def("data_malloc", &Handler::data_malloc, policy::automatic) - .def("copy_int32", &Handler::copy_int32, policy::automatic) - .def("copy_int64", &Handler::copy_int64, policy::automatic) - .def("copy_float", &Handler::copy_float, policy::automatic) .def("run", &Handler::run, policy::automatic); } From 64a5de51f32834c1ae5395fc4d37a2df35681852 Mon Sep 17 00:00:00 2001 From: wanghailu Date: Mon, 27 Feb 2023 10:22:03 +0800 Subject: [PATCH 27/29] fix --- pyinfinitensor/src/pyinfinitensor/onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 4ec4f365..e9424ab3 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -588,7 +588,7 @@ class OnnxStub: self.handler.copy_float(self.inputs[name], data) def take_float(self) -> List[float]: - return next(iter(self.handler.outputs.values())).copyFloats() + return next(iter(self.outputs.values())).cloneFloats() def from_onnx(model: ModelProto, runtime): From aff2b538cebb72b04d91c69a47849c4237cc99df Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Wed, 22 Mar 2023 10:13:06 +0800 Subject: [PATCH 28/29] =?UTF-8?q?fix:=20=E5=88=A0=E9=99=A4=E5=8D=95?= =?UTF-8?q?=E7=8B=AC=E7=9A=84=E6=8B=B7=E8=B4=9D=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- pyinfinitensor/src/pyinfinitensor/onnx.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index e9424ab3..9a6afd21 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -578,18 +578,6 @@ class OnnxStub: def run(self) -> None: self.handler.run() - def put_int32(self, name: str, data: List[int]) -> None: - self.handler.copy_int32(self.inputs[name], data) - - def put_int64(self, name: str, data: List[int]) -> None: - self.handler.copy_int64(self.inputs[name], data) - - def put_float(self, name: str, data: List[int]) -> None: - self.handler.copy_float(self.inputs[name], data) - - def take_float(self) -> List[float]: - return next(iter(self.outputs.values())).cloneFloats() - def from_onnx(model: ModelProto, runtime): stub = OnnxStub(model, runtime) From d9886e9de302f126f998ef23ebd3cbdd3087d393 Mon Sep 17 00:00:00 2001 From: whjthu Date: Sat, 25 Mar 2023 12:04:24 +0800 Subject: [PATCH 29/29] fix: remove inline keyword in class; rename getter and setter for inputOf and outputOf --- include/core/graph.h | 4 ++-- include/core/tensor.h | 16 +++++++-------- include/core/tensor_base.h | 10 ++++----- include/operators/batch_norm.h | 10 ++++----- include/operators/pooling.h | 26 +++++++++++------------- src/core/graph.cc | 10 ++++----- src/core/tensor.cc | 6 +++--- src/ffi/ffi_infinitensor.cc | 2 +- test/core/test_graph.cc | 18 ++++++++-------- test/core/test_search.cc | 4 ++-- test/kernels/cuda/test_cuda_inception.cc | 2 +- 11 files changed, 53 insertions(+), 55 deletions(-) diff --git a/include/core/graph.h b/include/core/graph.h index 8e317a8b..4e654caa 100644 --- a/include/core/graph.h +++ b/include/core/graph.h @@ -63,7 +63,7 @@ class GraphObj : public Object { inline TensorVec getInputs() const { TensorVec ret; for (const auto &t : tensors) - if (!t->getOutputOf()) + if (!t->getSource()) ret.emplace_back(t); return ret; } @@ -74,7 +74,7 @@ class GraphObj : public Object { inline TensorVec getOutputs() const { TensorVec ret; for (const auto &t : tensors) - if (t->getInputOf().empty()) + if (t->getTargets().empty()) ret.emplace_back(t); return ret; } diff --git a/include/core/tensor.h b/include/core/tensor.h index ed95f5f5..72a3b007 100644 --- a/include/core/tensor.h +++ b/include/core/tensor.h @@ -19,10 +19,10 @@ class TensorObj : public TensorBaseObj { Fuid fuid; // Cloned tensors share the same id. Tensors constructed from // scratch have a new id. - inline void copyin(const void *ptr, size_t size) { + void copyin(const void *ptr, size_t size) { runtime->copyBlobFromCPU(getRawDataPtr(), ptr, size); } - inline void copyout(void *ptr, size_t size) const { + void copyout(void *ptr, size_t size) const { runtime->copyBlobToCPU(ptr, getRawDataPtr(), size); } @@ -31,33 +31,33 @@ class TensorObj : public TensorBaseObj { virtual ~TensorObj() {} string toString() const override; - inline size_t size() const { return _size; } - inline size_t getBytes() const { return _size * dtype.getSize(); } + size_t size() const { return _size; } + size_t getBytes() const { return _size * dtype.getSize(); } Shape getDims() const { return shape; } vector getStride() const; size_t getOffset(const vector &ds) const; void dataMalloc(); - inline UidBaseType getFuid() const { return fuid; } + UidBaseType getFuid() const { return fuid; } void load(std::string file_path); void save(std::string file_path); // Copy elements from `data`. - template inline void copyin(const vector &data) { + template void copyin(const vector &data) { IT_ASSERT(DataType::get() == dtype); IT_ASSERT(data.size() >= _size); copyin(data.data(), getBytes()); } // Copy all the elements to a vector. - template inline auto copyout() const { + template auto copyout() const { IT_ASSERT(DataType::get() == dtype); std::vector ans(_size); copyout(ans.data(), getBytes()); return ans; } // Copy the element at `pos`. - template inline auto copyOne(const vector &pos) const { + template auto copyOne(const vector &pos) const { IT_ASSERT(DataType::get() == dtype); auto offset = getOffset(pos); auto bytes = dtype.getSize(); diff --git a/include/core/tensor_base.h b/include/core/tensor_base.h index 4c73094a..61b8d032 100644 --- a/include/core/tensor_base.h +++ b/include/core/tensor_base.h @@ -45,14 +45,14 @@ class TensorBaseObj : public Object { DataType getDType() const { return dtype; } Runtime getRuntime() const { return runtime; } - void addInputOf(const Operator &op) { targets.emplace_back(op); } - void setOutputOf(const Operator &op) { source = op; } + void addTarget(const Operator &op) { targets.emplace_back(op); } + void setSource(const Operator &op) { source = op; } bool hasTarget() const { return !targets.empty(); } - OpVec getInputOf() const { return wrefs_to_refs(targets); } - Operator getOutputOf() const { return source.lock(); } - // std::pair getOutputOfWithIndex(); + OpVec getTargets() const { return wrefs_to_refs(targets); } + Operator getSource() const { return source.lock(); } + // std::pair getSourceWithIndex(); // bool setScalar(VType val) { // if (data == nullptr || !dims.empty()) diff --git a/include/operators/batch_norm.h b/include/operators/batch_norm.h index 8e41a043..fbee21fd 100644 --- a/include/operators/batch_norm.h +++ b/include/operators/batch_norm.h @@ -39,11 +39,11 @@ class BatchNormObj : public OperatorObj { std::string toString() const override; // output size will be 3 when training - inline int numInputs() const override { return 5; } - inline int numOutputs() const override { return outputs.size(); } - inline float getMomentum() const { return momentum; } - inline float getEps() const { return eps; } - inline bool getTraining() const { return training; } + int numInputs() const override { return 5; } + int numOutputs() const override { return outputs.size(); } + float getMomentum() const { return momentum; } + float getEps() const { return eps; } + bool getTraining() const { return training; } private: vector getWorkloadVector() const override; diff --git a/include/operators/pooling.h b/include/operators/pooling.h index 1a1a6edf..c14bb8ad 100644 --- a/include/operators/pooling.h +++ b/include/operators/pooling.h @@ -39,22 +39,20 @@ class PoolingObj : public OperatorObj { optional> inferShape(const TensorVec &inputs) const override; std::string toString() const override; - inline int numInputs() const override { return 1; } - inline int numOutputs() const override { return 1; } + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } - inline int getKh() const { return kh; } - inline int getKw() const { return kw; } - inline int getDh() const { return dh; } - inline int getDw() const { return dw; } - inline int getPh() const { return ph; } - inline int getPw() const { return pw; } - inline int getSh() const { return sh; } - inline int getSw() const { return sw; } + int getKh() const { return kh; } + int getKw() const { return kw; } + int getDh() const { return dh; } + int getDw() const { return dw; } + int getPh() const { return ph; } + int getPw() const { return pw; } + int getSh() const { return sh; } + int getSw() const { return sw; } - inline auto getPadStrideDilation() const { - return tuple(ph, pw, sh, sw, dh, dw); - } - inline auto getNCHWRS() const { return tuple(n, c, h, w, kh, kw); } + auto getPadStrideDilation() const { return tuple(ph, pw, sh, sw, dh, dw); } + auto getNCHWRS() const { return tuple(n, c, h, w, kh, kw); } private: vector getWorkloadVector() const override; diff --git a/src/core/graph.cc b/src/core/graph.cc index 04ce2581..800f4ca5 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -33,15 +33,15 @@ void GraphObj::addOperatorAndConnect(const Operator &op) { sorted = false; ops.push_back(op); for (auto &input : op->getInputs()) { - input->addInputOf(op); - if (auto pred = input->getOutputOf()) { + input->addTarget(op); + if (auto pred = input->getSource()) { pred->addSuccessors(op); op->addPredecessors(pred); } } for (auto &output : op->getOutputs()) { - output->setOutputOf(op); - for (auto &succ : output->getInputOf()) { + output->setSource(op); + for (auto &succ : output->getTargets()) { succ->addPredecessors(op); op->addSuccessors(succ); } @@ -87,7 +87,7 @@ bool GraphObj::topo_sort() { // this node is a head node. const auto is_head = std::all_of( this_inputs.begin(), this_inputs.end(), [&](const auto &input) { - auto src = input->getOutputOf(); + auto src = input->getSource(); return src // If the source node is in the waiting list, // means that this node is not the head node. ? waiting.find(src) == waiting.end() diff --git a/src/core/tensor.cc b/src/core/tensor.cc index b8a41728..362c3e76 100644 --- a/src/core/tensor.cc +++ b/src/core/tensor.cc @@ -18,14 +18,14 @@ string TensorObj::toString() const { string ret = "Tensor " + std::to_string(guid) + ", Fuid " + std::to_string(fuid) + ", shape " + vecToString(shape) + ", dtype " + dtype.toString(); - vector inputOfGuid; + vector targetGuids; for (const auto &op : targets) - inputOfGuid.emplace_back(op.lock()->getGuid()); + targetGuids.emplace_back(op.lock()->getGuid()); if (auto o = source.lock()) ret += ", source " + std::to_string(o->getGuid()); else ret += ", source None"; - ret += ", targets " + vecToString(inputOfGuid); + ret += ", targets " + vecToString(targetGuids); return ret; } diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 627be8bf..7d080548 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -184,7 +184,7 @@ void init_graph_builder(py::module &m) { .def("copyout_int32", &TensorObj::copyout, policy::move) .def("copyout_int64", &TensorObj::copyout, policy::move) .def("has_target", &TensorObj::hasTarget, policy::automatic) - .def("src", &TensorObj::getOutputOf, policy::move); + .def("src", &TensorObj::getSource, policy::move); py::class_>(m, "Operator") .def("op_type", &OperatorObj::getOpType, policy::automatic) .def("inputs", py::overload_cast<>(&OperatorObj::getInputs, py::const_), diff --git a/test/core/test_graph.cc b/test/core/test_graph.cc index 8140ea43..85c012b9 100644 --- a/test/core/test_graph.cc +++ b/test/core/test_graph.cc @@ -19,13 +19,13 @@ TEST(Graph, build_and_run) { w0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); auto matmul = g->addOpWithOutputs(i0, w0, o0); g->print(); - // check inputOf and outputsOf for tensor - EXPECT_EQ(i0->getInputOf().size(), 1u); - EXPECT_EQ(w0->getInputOf().size(), 1u); - EXPECT_EQ(o0->getInputOf().size(), 0u); - EXPECT_EQ(i0->getOutputOf(), nullptr); - EXPECT_EQ(w0->getOutputOf(), nullptr); - EXPECT_NE(o0->getOutputOf(), nullptr); + // check targets and source for tensor + EXPECT_EQ(i0->getTargets().size(), 1u); + EXPECT_EQ(w0->getTargets().size(), 1u); + EXPECT_EQ(o0->getTargets().size(), 0u); + EXPECT_EQ(i0->getSource(), nullptr); + EXPECT_EQ(w0->getSource(), nullptr); + EXPECT_NE(o0->getSource(), nullptr); EXPECT_EQ(matmul->getPredecessors().size(), 0u); EXPECT_EQ(matmul->getSuccessors().size(), 0u); @@ -139,8 +139,8 @@ TEST(Graph, test_OpVec_ctor) { map, int> inputOutput2Cnt = { {{1, 0}, 2}, {{1, 1}, 1}, {{0, 1}, 1}}; for (auto t : g2->getTensors()) { - pair key = {t->getInputOf().size(), - t->getOutputOf() != nullptr}; + pair key = {t->getTargets().size(), + t->getSource() != nullptr}; EXPECT_GE(inputOutput2Cnt[key], 0); inputOutput2Cnt[key]--; } diff --git a/test/core/test_search.cc b/test/core/test_search.cc index 5f531c21..5f354fdb 100644 --- a/test/core/test_search.cc +++ b/test/core/test_search.cc @@ -23,7 +23,7 @@ namespace infini { // w0->copyin(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); // auto matmul = g->addOpWithOutputs(i0, w0, o0); // g->print(); -// // check inputOf and outputsOf for tensor +// // check targets and source for tensor // SearchEngine searchEngine(runtime, make_ref()); // searchEngine.run(g); // // check execution results @@ -46,7 +46,7 @@ TEST(Graph, search_withdm) { auto conv1 = g->addOpWithOutputs(t3, w3, t4, 1, 1); auto add1 = g->addOpWithOutputs(t4, t5, t6); g->dataMalloc(); - // check inputOf and outputsOf for tensor + // check targets and source for tensor SearchEngine searchEngine(runtime, make_ref(10)); searchEngine.run(g); // check execution results diff --git a/test/kernels/cuda/test_cuda_inception.cc b/test/kernels/cuda/test_cuda_inception.cc index 1e691576..31a7b888 100644 --- a/test/kernels/cuda/test_cuda_inception.cc +++ b/test/kernels/cuda/test_cuda_inception.cc @@ -64,7 +64,7 @@ TEST(CUDA_Inception_v3_block, run) { // check connection EXPECT_EQ(maxpool->getSuccessors().size(), 4u); - EXPECT_EQ(chainInput->getInputOf().size(), 4u); + EXPECT_EQ(chainInput->getTargets().size(), 4u); for (const auto &chainOps : ops) { for (size_t i = 1; i < chainOps.size(); i++) { auto prev = chainOps[i - 1];