InfiniTensor/pyinfinitensor/tests/test_onnx.py

import os, onnx, unittest
from onnx import TensorProto
from onnx.helper import (
    make_model,
    make_node,
    make_tensor,
    make_graph,
    make_tensor_value_info,
)
from onnx.checker import check_model, check_graph
from onnx.shape_inference import infer_shapes
from pyinfinitensor.onnx import from_onnx, OnnxStub, backend, _parse_data_fp16
import numpy as np


def make_and_import_model(graph: onnx.GraphProto):
    check_graph(graph)
    model = make_model(graph)
    check_model(model)
    from_onnx(model, backend.cpu_runtime())


class TestStringMethods(unittest.TestCase):
    # def test_run(self):
    #    model_file = next(
    #        (name for name in os.listdir() if name.endswith(".onnx")), None
    #    )
    #    if model_file != None:
    #        print(
    #            "model: {file}({size:.2f} MiB)".format(
    #                file=model_file, size=os.path.getsize(model_file) / 1024 / 1024
    #            )
    #        )
    #        run_onnx(onnx.load(model_file), runtime)

    def test_load(self):
        for model_file in os.listdir():
            if model_file.endswith(".onnx"):
                print(
                    "model: {file}({size:.2f} MiB)".format(
                        file=model_file, size=os.path.getsize(model_file) / 1024 / 1024
                    )
                )
                model = OnnxStub(onnx.load(model_file), backend.cpu_runtime()).to_onnx(
                    "new"
                )
                model = infer_shapes(model)

    def test_tensor(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 2, 3])
        make_and_import_model(make_graph([], "tensor", [x], [x]))

    def test_conv(self):
        i = make_tensor_value_info("i", TensorProto.FLOAT, [1, 3, 4, 4])
        w = make_tensor_value_info("w", TensorProto.FLOAT, [2, 3, 3, 3])
        o = make_tensor_value_info("o", TensorProto.FLOAT, [1, 2, 2, 2])
        conv = make_node(
            "Conv",
            ["i", "w"],
            ["o"],
            "conv",
            pads=[1, 1, 1, 1],
            strides=[2, 1],
            dilations=[1, 2],
        )
        make_and_import_model(make_graph([conv], "conv", [i, w], [o]))

    def test_conv_fp16(self):
        i = make_tensor_value_info("i", TensorProto.FLOAT16, [1, 3, 4, 4])
        w = make_tensor_value_info("w", TensorProto.FLOAT16, [2, 3, 3, 3])
        o = make_tensor_value_info("o", TensorProto.FLOAT16, [1, 2, 2, 2])
        conv = make_node(
            "Conv",
            ["i", "w"],
            ["o"],
            "conv",
            pads=[1, 1, 1, 1],
            strides=[2, 1],
            dilations=[1, 2],
        )
        make_and_import_model(make_graph([conv], "conv_fp16", [i, w], [o]))

    def test_conv_bfp16(self):
        i = make_tensor_value_info("i", TensorProto.BFLOAT16, [1, 3, 4, 4])
        w = make_tensor_value_info("w", TensorProto.BFLOAT16, [2, 3, 3, 3])
        o = make_tensor_value_info("o", TensorProto.BFLOAT16, [1, 2, 2, 2])
        conv = make_node(
            "Conv",
            ["i", "w"],
            ["o"],
            "conv",
            pads=[1, 1, 1, 1],
            strides=[2, 1],
            dilations=[1, 2],
        )
        make_and_import_model(make_graph([conv], "conv_bfp16", [i, w], [o]))

    def test_matmul(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 2, 3])
        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 4])
        xa = make_tensor_value_info("xa", TensorProto.FLOAT, [1, 2, 4])
        matmul = make_node("MatMul", ["x", "a"], ["xa"], name="matmul")
        make_and_import_model(make_graph([matmul], "matmul", [x, a], [xa]))

    def test_gemm(self):
        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 2, 3])
        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 4, 3])
        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 2, 4])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 2, 4])
        gemm = make_node("Gemm", ["a", "b", "c"], ["y"], transB=1, name="gemm")
        make_and_import_model(make_graph([gemm], "gemm", [a, b, c], [y]))

    def test_batch_norm(self):
        x = make_tensor_value_info("x", TensorProto.UINT32, [1, 3, 2, 2])
        scale = make_tensor_value_info("scale", TensorProto.FLOAT, [3])
        b = make_tensor_value_info("b", TensorProto.FLOAT, [3])
        mean = make_tensor_value_info("mean", TensorProto.FLOAT, [3])
        var = make_tensor_value_info("var", TensorProto.FLOAT, [3])
        y = make_tensor_value_info("y", TensorProto.UINT32, [1, 3, 2, 2])
        batch_norm = make_node(
            "BatchNormalization",
            ["x", "scale", "b", "mean", "var"],
            ["y"],
            name="batchNormalization",
        )
        make_and_import_model(
            make_graph([batch_norm], "batchNormalzation", [x, scale, b, mean, var], [y])
        )

    def test_max_pool(self):
        x = make_tensor_value_info("x", TensorProto.UINT32, [1, 64, 162, 162])
        y = make_tensor_value_info("y", TensorProto.UINT32, [1, 64, 80, 80])
        pool = make_node(
            "MaxPool",
            ["x"],
            ["y"],
            kernel_shape=[3, 3],
            dilations=[1, 1],
            pads=[0, 0, 0, 0],
            strides=[2, 2],
            name="maxPool",
        )
        make_and_import_model(make_graph([pool], "maxPool", [x], [y]))

    def test_avg_pool(self):
        x = make_tensor_value_info("x", TensorProto.UINT32, [1, 64, 162, 162])
        y = make_tensor_value_info("y", TensorProto.UINT32, [1, 64, 80, 80])
        pool = make_node(
            "AveragePool",
            ["x"],
            ["y"],
            kernel_shape=[3, 3],
            pads=[0, 0, 0, 0],
            strides=[2, 2],
            name="avgPool",
        )
        make_and_import_model(make_graph([pool], "avgPool", [x], [y]))

    def test_global_avg_pool(self):
        x = make_tensor_value_info("x", TensorProto.UINT32, [30, 30, 30, 30])
        y = make_tensor_value_info("y", TensorProto.UINT32, [30, 30, 1, 1])
        pool = make_node(
            "GlobalAveragePool",
            ["x"],
            ["y"],
            name="globalAvgPool",
        )
        make_and_import_model(make_graph([pool], "avgPool", [x], [y]))

    def test_add(self):
        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 5, 7])
        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 5, 7])
        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 3, 5, 7])
        add = make_node("Add", ["a", "b"], ["c"], name="add")
        make_and_import_model(make_graph([add], "add", [a, b], [c]))

    def test_sub(self):
        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 5, 7])
        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 5, 7])
        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 3, 5, 7])
        sub = make_node("Sub", ["a", "b"], ["c"], name="sub")
        make_and_import_model(make_graph([sub], "sub", [a, b], [c]))

    def test_mul(self):
        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 5, 7])
        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 5, 7])
        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 3, 5, 7])
        mul = make_node("Mul", ["a", "b"], ["c"], name="mul")
        make_and_import_model(make_graph([mul], "mul", [a, b], [c]))

    def test_div(self):
        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 5, 7])
        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 5, 7])
        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 3, 5, 7])
        div = make_node("Div", ["a", "b"], ["c"], name="div")
        make_and_import_model(make_graph([div], "div", [a, b], [c]))

    def test_pow(self):
        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 5, 7])
        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 5, 7])
        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 3, 5, 7])
        pow = make_node("Pow", ["a", "b"], ["c"], name="pow")
        make_and_import_model(make_graph([pow], "pow", [a, b], [c]))

    def test_relu(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        relu = make_node("Relu", ["x"], ["y"], name="relu")
        make_and_import_model(make_graph([relu], "relu", [x], [y]))

    """Gelu operator is not supported by onnx 14.1 currently."""

    def test_gelu(self):
        pass
        # x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        # y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        # gelu = make_node("Gelu", ["x"], ["y"], name="gelu")
        # make_and_import_model(make_graph([gelu], "gelu", [x], [y]))

    def test_erf(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        erf = make_node("Erf", ["x"], ["y"], name="erf")
        make_and_import_model(make_graph([erf], "erf", [x], [y]))

    def test_sqrt(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        sqrt = make_node("Sqrt", ["x"], ["y"], name="sqrt")
        make_and_import_model(make_graph([sqrt], "sqrt", [x], [y]))

    def test_sigmoid(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        sigmoid = make_node("Sigmoid", ["x"], ["y"], name="sigmoid")
        make_and_import_model(make_graph([sigmoid], "sigmoid", [x], [y]))

    def test_tanh(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        tanh = make_node("Tanh", ["x"], ["y"], name="tanh")
        make_and_import_model(make_graph([tanh], "tanh", [x], [y]))

    def test_hard_sigmoid(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        hardSigmoid = make_node("HardSigmoid", ["x"], ["y"], name="hardSigmoid")
        make_and_import_model(make_graph([hardSigmoid], "hardSigmoid", [x], [y]))

    def test_hard_swish(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        hardSwish = make_node("HardSwish", ["x"], ["y"], name="hardSwish")
        make_and_import_model(make_graph([hardSwish], "hardSwish", [x], [y]))

    def test_softmax(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        softmax = make_node("Softmax", ["x"], ["y"], axis=2, name="softmax")
        make_and_import_model(make_graph([softmax], "softmax", [x], [y]))

    def test_abs(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        abs = make_node("Abs", ["x"], ["y"], name="abs")
        make_and_import_model(make_graph([abs], "abs", [x], [y]))

    def test_neg(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        neg = make_node("Neg", ["x"], ["y"], name="neg")
        make_and_import_model(make_graph([neg], "neg", [x], [y]))

    def test_identity(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        identity = make_node("Identity", ["x"], ["y"], name="identity")
        make_and_import_model(make_graph([identity], "identity", [x], [y]))

    def test_flatten(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1 * 3, 5 * 7])
        flatten = make_node("Flatten", ["x"], ["y"], axis=2, name="flatten")
        make_and_import_model(make_graph([flatten], "flatten", [x], [y]))

    def test_reshape(self):
        data = make_tensor_value_info("data", TensorProto.FLOAT, [2, 3, 4, 5])
        shape = make_tensor_value_info("shape", TensorProto.INT64, [3])
        shape_data = make_tensor("shape", TensorProto.INT64, [3], [5, 3, 8])
        reshaped = make_tensor_value_info(
            "reshaped", TensorProto.FLOAT, shape_data.int64_data
        )
        reshape = make_node("Reshape", ["data", "shape"], ["reshaped"], name="reshape")
        make_and_import_model(
            make_graph([reshape], "reshape", [data, shape], [reshaped], [shape_data])
        )

    def test_resize(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 128, 40, 40])
        roi = make_tensor("roi", TensorProto.FLOAT, [0], [])
        scales = make_tensor("scales", TensorProto.FLOAT, [4], [1, 1, 2, 2])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 128, 80, 80])
        reshape = make_node("Resize", ["x", "roi", "scales"], ["y"], name="resize")
        make_and_import_model(make_graph([reshape], "resize", [x], [y], [roi, scales]))

    def test_squeeze(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 1, 5])
        axes = make_tensor_value_info("axes", TensorProto.INT64, [2])
        axes_data = make_tensor("axes", TensorProto.INT64, [2], [0, 2])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [3, 5])
        squeeze = make_node("Squeeze", ["input", "axes"], ["output"], name="squeeze")
        make_and_import_model(
            make_graph([squeeze], "squeeze", [input, axes], [output], [axes_data])
        )

    def test_unsqueeze(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [2, 3, 4, 5])
        axes = make_tensor_value_info("axes", TensorProto.INT64, [2])
        axes_data = make_tensor("axes", TensorProto.INT64, [2], [0, 2])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 2, 1, 3, 4, 5])
        unsqueeze = make_node(
            "Unsqueeze", ["input", "axes"], ["output"], name="unsqueeze"
        )
        make_and_import_model(
            make_graph([unsqueeze], "unsqueeze", [input, axes], [output], [axes_data])
        )

    def test_concat(self):
        input1 = make_tensor_value_info("input1", TensorProto.FLOAT, [1, 3, 2, 4])
        input2 = make_tensor_value_info("input2", TensorProto.FLOAT, [1, 3, 2, 5])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 9])
        concat = make_node(
            "Concat", ["input1", "input2"], ["output"], axis=3, name="concat"
        )
        make_and_import_model(
            make_graph([concat], "concat", [input1, input2], [output])
        )

    def test_gather(self):
        data = make_tensor_value_info("data", TensorProto.FLOAT, [1, 3, 4, 4])
        indices = make_tensor_value_info("indices", TensorProto.INT64, [2, 1, 2])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 2, 1, 2, 4, 4])
        gather = make_node(
            "Gather", ["data", "indices"], ["output"], axis=1, name="gather"
        )
        make_and_import_model(make_graph([gather], "gather", [data, indices], [output]))

    def test_gather_elements(self):
        data = make_tensor_value_info("data", TensorProto.FLOAT, [2, 3, 2])
        indices = make_tensor_value_info("indices", TensorProto.INT64, [2, 1, 2])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [2, 1, 2])
        gatherElements = make_node(
            "GatherElements",
            ["data", "indices"],
            ["output"],
            axis=1,
            name="gatherElements",
        )
        make_and_import_model(
            make_graph([gatherElements], "gatherElements", [data, indices], [output])
        )

    def test_reduce_mean(self):
        data = make_tensor_value_info("data", TensorProto.FLOAT, [2, 3, 3, 4])
        reduced = make_tensor_value_info("reduced", TensorProto.FLOAT, [1, 1, 1, 1])
        reduceMean = make_node(
            "ReduceMean", ["data"], ["reduced"], keepdims=1, name="reduceMean"
        )
        make_and_import_model(make_graph([reduceMean], "reduceMean", [data], [reduced]))

    def test_reduce_sum(self):
        data = make_tensor_value_info("data", TensorProto.FLOAT, [2, 3, 3, 4])
        reduced = make_tensor_value_info("reduced", TensorProto.FLOAT, [1, 1, 1, 1])
        reduceSum = make_node(
            "ReduceSum", ["data"], ["reduced"], keepdims=1, name="reduceSum"
        )
        make_and_import_model(make_graph([reduceSum], "reduceSum", [data], [reduced]))

    def test_slice(self):
        data = make_tensor_value_info("data", TensorProto.UINT32, [10, 64, 162, 162])
        output = make_tensor_value_info("output", TensorProto.UINT32, [1, 1, 99, 95])
        starts = make_tensor("starts", TensorProto.INT64, [4], [2, 9, 1, 5])
        ends = make_tensor("ends", TensorProto.INT64, [4], [3, 10, 100, 100])
        slice = make_node("Slice", ["data", "starts", "ends"], ["output"], name="slice")
        make_and_import_model(
            make_graph(
                [slice],
                "slice",
                [data],
                [output],
                [starts, ends],
            )
        )

    def test_pad(self):
        data = make_tensor_value_info("data", TensorProto.UINT32, [1, 64, 162, 162])
        output = make_tensor_value_info("output", TensorProto.UINT32, [3, 84, 164, 172])
        pads = make_tensor_value_info("pads", TensorProto.INT64, [8])
        pads_data = make_tensor(
            "pads", TensorProto.INT64, [8], [2, 10, 1, 5, 0, 10, 1, 5]
        )
        pad = make_node("Pad", ["data", "pads"], ["output"], name="pad")
        make_and_import_model(
            make_graph(
                [pad],
                "pad",
                [data, pads],
                [output],
                [pads_data],
            )
        )

    def test_allReduceSum(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
        allReduceSum = make_node(
            "AllReduceSum", ["input"], ["output"], name="allReduceSum"
        )
        graph = make_graph([allReduceSum], "allReduceSum", [input], [output])
        model = make_model(graph)
        from_onnx(model, backend.cpu_runtime())

    def test_allReduceProd(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
        allReduceProd = make_node(
            "AllReduceProd", ["input"], ["output"], name="allReduceProd"
        )
        graph = make_graph([allReduceProd], "allReduceProd", [input], [output])
        model = make_model(graph)
        from_onnx(model, backend.cpu_runtime())

    def test_allReduceMin(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
        allReduceMin = make_node(
            "AllReduceMin", ["input"], ["output"], name="allReduceMin"
        )
        graph = make_graph([allReduceMin], "allReduceMin", [input], [output])
        model = make_model(graph)
        from_onnx(model, backend.cpu_runtime())

    def test_allReduceMax(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
        allReduceMax = make_node(
            "AllReduceMax", ["input"], ["output"], name="allReduceMax"
        )
        graph = make_graph([allReduceMax], "allReduceMax", [input], [output])
        model = make_model(graph)
        from_onnx(model, backend.cpu_runtime())

    def test_allReduceAvg(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
        allReduceAvg = make_node(
            "AllReduceAvg", ["input"], ["output"], name="allReduceAvg"
        )
        graph = make_graph([allReduceAvg], "allReduceAvg", [input], [output])
        model = make_model(graph)
        from_onnx(model, backend.cpu_runtime())

    def test_split(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
        split = make_node("Split", ["input"], ["output"], name="split", axis=0)
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
        make_and_import_model(make_graph([split], "split", [input], [output]))

    def test_split1(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
        splitAttr = make_tensor("split", TensorProto.INT64, [2], [2, 1])
        output1 = make_tensor_value_info("output1", TensorProto.FLOAT, [1, 2, 2, 4])
        output2 = make_tensor_value_info("output2", TensorProto.FLOAT, [1, 1, 2, 4])
        split = make_node(
            "Split", ["input", "split"], ["output1", "output2"], name="split", axis=1
        )
        make_and_import_model(
            make_graph([split], "split", [input], [output1, output2], [splitAttr])
        )

    def test_allBroadcast(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
        broadcast = make_node(
            "Broadcast", ["input"], ["output"], name="broadcast", root=1
        )
        graph = make_graph([broadcast], "broadcast", [input], [output])
        model = make_model(graph)
        from_onnx(model, backend.cpu_runtime())

    def test_allGather(self):
        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
        world_size = make_tensor_value_info("world_size", TensorProto.INT32, [1])
        allGather = make_node(
            "AllGather", ["input", "world_size"], ["output"], name="allGather"
        )
        graph = make_graph([allGather], "allGather", [input, world_size], [])
        model = make_model(graph)
        from_onnx(model, backend.cpu_runtime())

    # see <https://onnx.ai/onnx/intro/python.html#a-simple-example-a-linear-regression>
    def test_linear(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 2, 3])
        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 4])
        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 2, 4])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 2, 4])
        matmul = make_node("MatMul", ["x", "a"], ["xa"], name="matmul")
        add = make_node("Add", ["xa", "b"], ["y"], name="add")
        graph = make_graph([matmul, add], "lr", [x, a, b], [y])
        model = make_model(graph)
        check_model(model)
        from_onnx(model, backend.cpu_runtime())

    def test_frontend(self):
        handler = backend.GraphHandler(backend.cpu_runtime())
        a = handler.tensor([1, 2, 3], 12)
        b = handler.tensor([1, 2, 3], 12)
        c = handler.tensor([1, 2, 3], 12)
        d = handler.tensor([1, 2, 3], 12)
        e = handler.tensor([1, 2, 3], 12)

        x = handler.add(
            handler.add(handler.add(handler.add(a, b, None), c, None), d, None), e, None
        )
        y = handler.tensor([3, 2, 1], 12)
        handler.reshape(x, y, [3, 2, 1])

    def test_cast(self):
        input1 = make_tensor_value_info("input1", TensorProto.FLOAT, [1, 3, 2, 4])
        output = make_tensor_value_info("output", TensorProto.FLOAT16, [1, 3, 2, 4])
        cast = make_node(
            "Cast", ["input1"], ["output"], to=TensorProto.FLOAT16, name="cast"
        )
        make_and_import_model(make_graph([cast], "cast", [input1], [output]))

    def test_expand(self):
        data = make_tensor_value_info("data", TensorProto.FLOAT, [3, 1])
        dim = make_tensor_value_info("dim", TensorProto.INT64, [3])
        dim_data = make_tensor("dim", TensorProto.INT64, [3], [2, 1, 6])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [2, 3, 6])
        expand = make_node("Expand", ["data", "dim"], ["output"], name="expand")
        make_and_import_model(
            make_graph([expand], "expand", [data, dim], [output], [dim_data])
        )

    def test_where(self):
        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
        con = make_tensor_value_info("con", TensorProto.BOOL, [1, 3, 5, 7])
        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 5, 7])
        where = make_node("Where", ["x", "y", "con"], ["output"], name="where")
        make_and_import_model(make_graph([where], "where", [x, y, con], [output]))

    def test_send(self):
        sendInput = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 5, 7])
        send = make_node("Send", ["input"], [], name="send", source=0, destination=1)
        graph = make_graph([send], "send", [sendInput], [])
        model = make_model(graph)
        from_onnx(model, backend.cpu_runtime())

    def test_recv(self):
        recvOutput = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 5, 7])
        recv = make_node(
            "Recv",
            [],
            ["output"],
            name="recv",
            source=0,
            destination=1,
            shape=[1, 3, 5, 7],
            dataType=1,
        )
        graph = make_graph([recv], "recv", [], [recvOutput])
        model = make_model(graph)
        from_onnx(model, backend.cpu_runtime())


class TestDynamicTensor(unittest.TestCase):
    def test_dynamic_tensor(self):
        filename = r"resnet18-v2-7.onnx"
        current_path = os.getcwd()
        model_file = ""
        for root, dirs, files in os.walk(current_path):
            if filename in files:
                model_file = os.path.join(root, filename)

        model = OnnxStub(onnx.load(model_file), backend.cpu_runtime())
        output_key = list(model.outputs.keys())[0]
        old_output_shape = model.getShape(output_key)
        self.assertEqual(old_output_shape, ([1, 1000]))
        model.set_input([[5, 3, 224, 224]])
        new_output_shape = model.getShape(output_key)
        self.assertEqual(new_output_shape, ([5, 1000]))


if __name__ == "__main__":
    unittest.main()
-												feat: 创建 pyinfinitensor 前端

- python 前端项目结构及打包和安装脚本
- 后端编译出 so 改名为 backend，增加 GraphHandler 修改图结构
- ci 支持测试这些功能

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-12 08:23:49 +08:00
+								import os, onnx, unittest
 								from onnx import TensorProto
-												feat: 补充 DataType 类型

- 增加了 6 个代数类型，与 onnx 的序号对应
- 现在可以导入 reshape 了

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 11:27:57 +08:00
+								from onnx.helper import (
 								    make_model,
 								    make_node,
 								    make_tensor,
 								    make_graph,
 								    make_tensor_value_info,
 								)
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								from onnx.checker import check_model, check_graph
 								from onnx.shape_inference import infer_shapes
-												Issue 107: Add copyin Numpy and covertion to Numpy (#126)

* Add copyin_numpy and to_numpy for pybind TensorObj

* fix copyin size assertion

* fix size calculation for scalar (rank = 0) tensor

* Use pybind buffer instead of returning array

* fix format
											
										
										
											2023-09-01 11:20:26 +08:00
+								from pyinfinitensor.onnx import from_onnx, OnnxStub, backend, _parse_data_fp16
 								import numpy as np
-												feat: 创建 pyinfinitensor 前端

- python 前端项目结构及打包和安装脚本
- 后端编译出 so 改名为 backend，增加 GraphHandler 修改图结构
- ci 支持测试这些功能

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-12 08:23:49 +08:00
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								def make_and_import_model(graph: onnx.GraphProto):
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								    check_graph(graph)
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								    model = make_model(graph)
 								    check_model(model)
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								    from_onnx(model, backend.cpu_runtime())
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
-												feat: 创建 pyinfinitensor 前端

- python 前端项目结构及打包和安装脚本
- 后端编译出 so 改名为 backend，增加 GraphHandler 修改图结构
- ci 支持测试这些功能

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-12 08:23:49 +08:00
+								class TestStringMethods(unittest.TestCase):
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								    # def test_run(self):
-												Cpu backend2 (#77)

fix review

change Device::MKL to Device::INTELCPU

fix mkl linkage

fix errors according to merge from master

now can call mkl backend

fix softmax/flatten with axis from onnx.

modify README.md

fix memory refree

add env_lotus_intelcpu.sh

fix compile

merge from branch cpu_backend

fix something add gather

fix something

FIX: directory rename from "mkl" to "intelcpu"

ADD: use oneMKL dpcpp interface to implement matmul kernel.

ADD: add dpcpp as compiler for mkl, and fix warnings for clang compiling.
add dpcpp kernel for pow.

ADD: mkl kernel for pad.

ADD: slice mkl kernel.

ADD: reshape/flatten/identity mkl kernel.

ADD: split mkl kernel.

fix compile error

FIX: fix flattenObj with axis.

ADD reduce_mean mkl kernel.

Add concat mkl kernel.

bathNorm for mkl kernel.

sigmoid mkl kernel.

ADD：add mkl kernel for pooling

add more tests for softmax

Now softmax cuda kernel supports any axises.

mkl kernel for softmax

softmax

add axis to softmax operator

add mkl kernel for abs tanh

ADD: relu kernel for mkl

fix binary mkl primitives.

add mkl kernel for binary operators

fix compiler error

move stream to runtime

clang format

add MemoryFormat for tensorObj.

use post_ops for fused conv/deconv

Distinguish mkl  op_timer from cuda op timer.

add act optype to conv and deconv

add operator timer

add mkl kernel for convTransposed

minor fix for group conv

do not use cblas_sgemm_batch

CpuRuntimeObj->NativeCpuRuntimeObj

add  matmul op for mkl
											
										
										
											2023-04-17 12:15:23 +08:00
+								    #    model_file = next(
 								    #        (name for name in os.listdir() if name.endswith(".onnx")), None
 								    #    )
 								    #    if model_file != None:
 								    #        print(
 								    #            "model: {file}({size:.2f} MiB)".format(
 								    #                file=model_file, size=os.path.getsize(model_file) / 1024 / 1024
 								    #            )
 								    #        )
 								    #        run_onnx(onnx.load(model_file), runtime)
-												feat: 创建 pyinfinitensor 前端

- python 前端项目结构及打包和安装脚本
- 后端编译出 so 改名为 backend，增加 GraphHandler 修改图结构
- ci 支持测试这些功能

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-12 08:23:49 +08:00
+								    def test_load(self):
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								        for model_file in os.listdir():
 								            if model_file.endswith(".onnx"):
 								                print(
 								                    "model: {file}({size:.2f} MiB)".format(
 								                        file=model_file, size=os.path.getsize(model_file) / 1024 / 1024
 								                    )
-												feat: 创建 pyinfinitensor 前端

- python 前端项目结构及打包和安装脚本
- 后端编译出 so 改名为 backend，增加 GraphHandler 修改图结构
- ci 支持测试这些功能

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-12 08:23:49 +08:00
+								                )
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								                model = OnnxStub(onnx.load(model_file), backend.cpu_runtime()).to_onnx(
 								                    "new"
 								                )
 								                model = infer_shapes(model)
-												feat: 创建 pyinfinitensor 前端

- python 前端项目结构及打包和安装脚本
- 后端编译出 so 改名为 backend，增加 GraphHandler 修改图结构
- ci 支持测试这些功能

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-12 08:23:49 +08:00
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
+								    def test_tensor(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 2, 3])
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([], "tensor", [x], [x]))
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
-												feat: 前端支持 Conv 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-22 15:05:44 +08:00
+								    def test_conv(self):
 								        i = make_tensor_value_info("i", TensorProto.FLOAT, [1, 3, 4, 4])
 								        w = make_tensor_value_info("w", TensorProto.FLOAT, [2, 3, 3, 3])
 								        o = make_tensor_value_info("o", TensorProto.FLOAT, [1, 2, 2, 2])
 								        conv = make_node(
 								            "Conv",
 								            ["i", "w"],
 								            ["o"],
 								            "conv",
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								            pads=[1, 1, 1, 1],
-												feat: 前端支持 Conv 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-22 15:05:44 +08:00
+								            strides=[2, 1],
 								            dilations=[1, 2],
 								        )
 								        make_and_import_model(make_graph([conv], "conv", [i, w], [o]))
-												支持fp16 dtype (#96)

* add conv_half kernel

* Conv Kernel FP16

* dcj:
replace "DataType::Float32" with "op->getDType()" to support more DataType

* feat: support Float16 dtype

* fix: set default clang-format to 14 version

* fix: 按照review意见修改

* fix: add data convert to convfp16 kernel test

* test: add conv_fp16 kernel test

---------

Co-authored-by: zhangyue207 <zhangyue@qiyuanlab.com>
Co-authored-by: kilinchange <kilinchange@163.com>
											
										
										
											2023-08-02 16:38:16 +08:00
+								    def test_conv_fp16(self):
 								        i = make_tensor_value_info("i", TensorProto.FLOAT16, [1, 3, 4, 4])
 								        w = make_tensor_value_info("w", TensorProto.FLOAT16, [2, 3, 3, 3])
 								        o = make_tensor_value_info("o", TensorProto.FLOAT16, [1, 2, 2, 2])
 								        conv = make_node(
 								            "Conv",
 								            ["i", "w"],
 								            ["o"],
 								            "conv",
 								            pads=[1, 1, 1, 1],
 								            strides=[2, 1],
 								            dilations=[1, 2],
 								        )
 								        make_and_import_model(make_graph([conv], "conv_fp16", [i, w], [o]))
-												support mixed dtype (#102)

* feat: support mixed dtype

* feat: support cast op

* test: add test for cast op

* feat: support datatype BFloat16

* feat: support data convert fp32 <-> bfp16

* fix: fix all op's infershape func

* fix as review comment
											
										
										
											2023-08-16 21:49:43 +08:00
+								    def test_conv_bfp16(self):
 								        i = make_tensor_value_info("i", TensorProto.BFLOAT16, [1, 3, 4, 4])
 								        w = make_tensor_value_info("w", TensorProto.BFLOAT16, [2, 3, 3, 3])
 								        o = make_tensor_value_info("o", TensorProto.BFLOAT16, [1, 2, 2, 2])
 								        conv = make_node(
 								            "Conv",
 								            ["i", "w"],
 								            ["o"],
 								            "conv",
 								            pads=[1, 1, 1, 1],
 								            strides=[2, 1],
 								            dilations=[1, 2],
 								        )
 								        make_and_import_model(make_graph([conv], "conv_bfp16", [i, w], [o]))
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
+								    def test_matmul(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 2, 3])
 								        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 4])
-												feat: 前端支持 gemm 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-15 13:20:34 +08:00
+								        xa = make_tensor_value_info("xa", TensorProto.FLOAT, [1, 2, 4])
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
+								        matmul = make_node("MatMul", ["x", "a"], ["xa"], name="matmul")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([matmul], "matmul", [x, a], [xa]))
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
-												feat: 前端支持 gemm 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-15 13:20:34 +08:00
+								    def test_gemm(self):
 								        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 2, 3])
 								        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 4, 3])
 								        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 2, 4])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 2, 4])
 								        gemm = make_node("Gemm", ["a", "b", "c"], ["y"], transB=1, name="gemm")
 								        make_and_import_model(make_graph([gemm], "gemm", [a, b, c], [y]))
-												test: batchNorm 单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 08:54:58 +08:00
+								    def test_batch_norm(self):
 								        x = make_tensor_value_info("x", TensorProto.UINT32, [1, 3, 2, 2])
-												fix: 修正 batchNorm 实现

- onnx 和 pytorch 认为 batchNorm 的 4 个参数是 [c] 形状的，cuDNN 可能认为是 [1,c,1,...]。
优化已改为 [c]，但 cuDNN 推理没有改；

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-23 11:48:28 +08:00
+								        scale = make_tensor_value_info("scale", TensorProto.FLOAT, [3])
 								        b = make_tensor_value_info("b", TensorProto.FLOAT, [3])
 								        mean = make_tensor_value_info("mean", TensorProto.FLOAT, [3])
 								        var = make_tensor_value_info("var", TensorProto.FLOAT, [3])
-												test: batchNorm 单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 08:54:58 +08:00
+								        y = make_tensor_value_info("y", TensorProto.UINT32, [1, 3, 2, 2])
 								        batch_norm = make_node(
 								            "BatchNormalization",
 								            ["x", "scale", "b", "mean", "var"],
 								            ["y"],
 								            name="batchNormalization",
 								        )
 								        make_and_import_model(
-												refactor(core): 添加新的 `OpType` 定义 (#99)

* feat: 添加新的 OpType 定义

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 使用新的 OpType 替换原来的，修改整个项目

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导入

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 cuda 和 bang kernel 的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 过滤 bang test

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 过滤 bang test

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix bang code.

* fix code on bang

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 删除指定文件

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 删两个没用的文件，去掉一个不知道为什么的注释

Signed-off-by: YdrMaster <ydrml@hotmail.com>

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
											
										
										
											2023-08-07 11:17:05 +08:00
+								            make_graph([batch_norm], "batchNormalzation", [x, scale, b, mean, var], [y])
-												test: batchNorm 单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 08:54:58 +08:00
+								        )
-												feat: 前端支持 pool 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 16:26:47 +08:00
+								    def test_max_pool(self):
 								        x = make_tensor_value_info("x", TensorProto.UINT32, [1, 64, 162, 162])
 								        y = make_tensor_value_info("y", TensorProto.UINT32, [1, 64, 80, 80])
 								        pool = make_node(
 								            "MaxPool",
 								            ["x"],
 								            ["y"],
 								            kernel_shape=[3, 3],
 								            dilations=[1, 1],
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								            pads=[0, 0, 0, 0],
-												feat: 前端支持 pool 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 16:26:47 +08:00
+								            strides=[2, 2],
 								            name="maxPool",
 								        )
 								        make_and_import_model(make_graph([pool], "maxPool", [x], [y]))
 								    def test_avg_pool(self):
 								        x = make_tensor_value_info("x", TensorProto.UINT32, [1, 64, 162, 162])
 								        y = make_tensor_value_info("y", TensorProto.UINT32, [1, 64, 80, 80])
 								        pool = make_node(
 								            "AveragePool",
 								            ["x"],
 								            ["y"],
 								            kernel_shape=[3, 3],
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								            pads=[0, 0, 0, 0],
-												feat: 前端支持 pool 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 16:26:47 +08:00
+								            strides=[2, 2],
 								            name="avgPool",
 								        )
 								        make_and_import_model(make_graph([pool], "avgPool", [x], [y]))
-												feat: 前端支持 GlobalAveragePool 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-16 10:33:24 +08:00
+								    def test_global_avg_pool(self):
 								        x = make_tensor_value_info("x", TensorProto.UINT32, [30, 30, 30, 30])
 								        y = make_tensor_value_info("y", TensorProto.UINT32, [30, 30, 1, 1])
 								        pool = make_node(
 								            "GlobalAveragePool",
 								            ["x"],
 								            ["y"],
 								            name="globalAvgPool",
 								        )
 								        make_and_import_model(make_graph([pool], "avgPool", [x], [y]))
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
+								    def test_add(self):
 								        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 5, 7])
 								        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 5, 7])
 								        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 3, 5, 7])
 								        add = make_node("Add", ["a", "b"], ["c"], name="add")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([add], "add", [a, b], [c]))
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
 								    def test_sub(self):
 								        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 5, 7])
 								        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 5, 7])
 								        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 3, 5, 7])
 								        sub = make_node("Sub", ["a", "b"], ["c"], name="sub")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([sub], "sub", [a, b], [c]))
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
 								    def test_mul(self):
 								        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 5, 7])
 								        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 5, 7])
 								        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 3, 5, 7])
 								        mul = make_node("Mul", ["a", "b"], ["c"], name="mul")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([mul], "mul", [a, b], [c]))
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
 								    def test_div(self):
 								        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 5, 7])
 								        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 5, 7])
 								        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 3, 5, 7])
 								        div = make_node("Div", ["a", "b"], ["c"], name="div")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([div], "div", [a, b], [c]))
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
 								    def test_pow(self):
 								        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 5, 7])
 								        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 3, 5, 7])
 								        c = make_tensor_value_info("c", TensorProto.FLOAT, [1, 3, 5, 7])
 								        pow = make_node("Pow", ["a", "b"], ["c"], name="pow")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([pow], "pow", [a, b], [c]))
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
-												feat: 前端支持 relu sigmoid tanh softmax abs 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:54:54 +08:00
+								    def test_relu(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        relu = make_node("Relu", ["x"], ["y"], name="relu")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([relu], "relu", [x], [y]))
-												feat: 前端支持 relu sigmoid tanh softmax abs 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:54:54 +08:00
-												Xpu (#82)

* support kunlun xpu and add an operator named Add

* add sub, mul, div, pow, maximum, minimum

* add code

* add xpu code

* add code

* add matmul

* add transpose

* add unary operator

* add unary operator

* add some operator

* add code

* support run resnet18 on xpu

* add code

* add max pool2d

* fix xpu code, let it can run.

* 添加XPU算子 (#120)

* add floordiv for xpu

* add batchnorm for xpu

* add more cast types for xpu

* add conv_trans for xpu

* add pad for xpu

* add logical ops for xpu

* fix format for xpu src and include

* fix format for xpu test

* fix format for xpu src

---------

Co-authored-by: Bolun <bolunz@u.nus.edu>

* Xpu abs (#121)

* add: unary kernel for xpu

* formatting

* format

* format

* format

* fix: pointer jump

* fix optype comments

* fix bug introduced while resolving conflict

* change cmake option for kunlunxin xpu from 'xpu' to 'kunlun'; fix bug after merging distributed infrastructure

* Add doc support for xpu (#141)

* fix

* fix

* fix pooling test

* format

* format

* fix

* fix

* set cmake version requirement

* fix cmakelists

* rename xpu to kunlun

* fix

* fix format

* fix format

* fix format

* fix change name to kunlun

* format

* fix format

* clang format

* fix format

---------

Co-authored-by: root <root@localhost.localdomain>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: Bolun Zhang <48948016+Chamberlain0w0@users.noreply.github.com>
Co-authored-by: Bolun <bolunz@u.nus.edu>
Co-authored-by: zhangyue207 <138768300+zhangyue207@users.noreply.github.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
Co-authored-by: baominghelly <41820386+baominghelly@users.noreply.github.com>
Co-authored-by: Bolun <chamberlain0w0@gmail.com>
											
										
										
											2023-10-16 10:57:08 +08:00
+								    """Gelu operator is not supported by onnx 14.1 currently."""
-												support Dynamic tensor infer shape and fix memory pool (#176)

* feat: support dynamic tensor part1

* feat: support dynamic-tensor part2

* feat: support dynamic tensor part 3

* fix: fix some ..

* - add kvcache example

* feat: support concat to identity kernel

* add a simple mempory pool for allocator

* fix: rebase to master

* fix bug after merging

* - remove outdated script

* fix: fix as review

---------

Co-authored-by: kilinchange <kilinchange@163.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-11-23 13:11:50 +08:00
-												【Hackathon No.108】Add Gelu operator, ffi, kernel for cpu and gpu. (#148)

feat: Add Gelu kernel, operator, ffi.
											
										
										
											2023-10-10 15:21:13 +08:00
+								    def test_gelu(self):
 								        pass
 								        # x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        # y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        # gelu = make_node("Gelu", ["x"], ["y"], name="gelu")
 								        # make_and_import_model(make_graph([gelu], "gelu", [x], [y]))
-												框架支持bert/gpt2模型构图 (#94)

* feat: support to sqrt op

* feat: support to erf op

* feat: support to expand op

* feat: support to where op

* fix: gather op index can be int64_t(hard coding)

* fix: some wrong use

* style: fix the format style

* test: add test for change op

* fix: rebase to master

* fix: fix matmul b compute wrong

* add expand and where kernel

* Add int64 support for cuda gather kernel

* add test_where.cc

* add "expand.(cu/cc,test,cuda),modified where.cu"

* Separate initialization of datatypes to avoid compile error

* modify where.(cu/cc/h,test), expand and clip

* Format fix

* Format fix

---------

Co-authored-by: xgqdut2016 <kenan_gewei@163.com>
Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-08-29 16:06:52 +08:00
+								    def test_erf(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        erf = make_node("Erf", ["x"], ["y"], name="erf")
 								        make_and_import_model(make_graph([erf], "erf", [x], [y]))
 								    def test_sqrt(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        sqrt = make_node("Sqrt", ["x"], ["y"], name="sqrt")
 								        make_and_import_model(make_graph([sqrt], "sqrt", [x], [y]))
-												feat: 前端支持 relu sigmoid tanh softmax abs 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:54:54 +08:00
+								    def test_sigmoid(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        sigmoid = make_node("Sigmoid", ["x"], ["y"], name="sigmoid")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([sigmoid], "sigmoid", [x], [y]))
-												feat: 前端支持 relu sigmoid tanh softmax abs 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:54:54 +08:00
 								    def test_tanh(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        tanh = make_node("Tanh", ["x"], ["y"], name="tanh")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([tanh], "tanh", [x], [y]))
-												Xpu (#82)

* support kunlun xpu and add an operator named Add

* add sub, mul, div, pow, maximum, minimum

* add code

* add xpu code

* add code

* add matmul

* add transpose

* add unary operator

* add unary operator

* add some operator

* add code

* support run resnet18 on xpu

* add code

* add max pool2d

* fix xpu code, let it can run.

* 添加XPU算子 (#120)

* add floordiv for xpu

* add batchnorm for xpu

* add more cast types for xpu

* add conv_trans for xpu

* add pad for xpu

* add logical ops for xpu

* fix format for xpu src and include

* fix format for xpu test

* fix format for xpu src

---------

Co-authored-by: Bolun <bolunz@u.nus.edu>

* Xpu abs (#121)

* add: unary kernel for xpu

* formatting

* format

* format

* format

* fix: pointer jump

* fix optype comments

* fix bug introduced while resolving conflict

* change cmake option for kunlunxin xpu from 'xpu' to 'kunlun'; fix bug after merging distributed infrastructure

* Add doc support for xpu (#141)

* fix

* fix

* fix pooling test

* format

* format

* fix

* fix

* set cmake version requirement

* fix cmakelists

* rename xpu to kunlun

* fix

* fix format

* fix format

* fix format

* fix change name to kunlun

* format

* fix format

* clang format

* fix format

---------

Co-authored-by: root <root@localhost.localdomain>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: Bolun Zhang <48948016+Chamberlain0w0@users.noreply.github.com>
Co-authored-by: Bolun <bolunz@u.nus.edu>
Co-authored-by: zhangyue207 <138768300+zhangyue207@users.noreply.github.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
Co-authored-by: baominghelly <41820386+baominghelly@users.noreply.github.com>
Co-authored-by: Bolun <chamberlain0w0@gmail.com>
											
										
										
											2023-10-16 10:57:08 +08:00
-												Add HardSigmoid and HardSwish (#156)

* Add HardSigmoid and HardSwish

* fix format
											
										
										
											2023-10-10 22:41:06 +08:00
+								    def test_hard_sigmoid(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        hardSigmoid = make_node("HardSigmoid", ["x"], ["y"], name="hardSigmoid")
 								        make_and_import_model(make_graph([hardSigmoid], "hardSigmoid", [x], [y]))
 								    def test_hard_swish(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        hardSwish = make_node("HardSwish", ["x"], ["y"], name="hardSwish")
 								        make_and_import_model(make_graph([hardSwish], "hardSwish", [x], [y]))
-												feat: 前端支持 relu sigmoid tanh softmax abs 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:54:54 +08:00
 								    def test_softmax(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
-												Cpu backend2 (#77)

fix review

change Device::MKL to Device::INTELCPU

fix mkl linkage

fix errors according to merge from master

now can call mkl backend

fix softmax/flatten with axis from onnx.

modify README.md

fix memory refree

add env_lotus_intelcpu.sh

fix compile

merge from branch cpu_backend

fix something add gather

fix something

FIX: directory rename from "mkl" to "intelcpu"

ADD: use oneMKL dpcpp interface to implement matmul kernel.

ADD: add dpcpp as compiler for mkl, and fix warnings for clang compiling.
add dpcpp kernel for pow.

ADD: mkl kernel for pad.

ADD: slice mkl kernel.

ADD: reshape/flatten/identity mkl kernel.

ADD: split mkl kernel.

fix compile error

FIX: fix flattenObj with axis.

ADD reduce_mean mkl kernel.

Add concat mkl kernel.

bathNorm for mkl kernel.

sigmoid mkl kernel.

ADD：add mkl kernel for pooling

add more tests for softmax

Now softmax cuda kernel supports any axises.

mkl kernel for softmax

softmax

add axis to softmax operator

add mkl kernel for abs tanh

ADD: relu kernel for mkl

fix binary mkl primitives.

add mkl kernel for binary operators

fix compiler error

move stream to runtime

clang format

add MemoryFormat for tensorObj.

use post_ops for fused conv/deconv

Distinguish mkl  op_timer from cuda op timer.

add act optype to conv and deconv

add operator timer

add mkl kernel for convTransposed

minor fix for group conv

do not use cblas_sgemm_batch

CpuRuntimeObj->NativeCpuRuntimeObj

add  matmul op for mkl
											
										
										
											2023-04-17 12:15:23 +08:00
+								        softmax = make_node("Softmax", ["x"], ["y"], axis=2, name="softmax")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([softmax], "softmax", [x], [y]))
-												feat: 前端支持 relu sigmoid tanh softmax abs 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:54:54 +08:00
 								    def test_abs(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        abs = make_node("Abs", ["x"], ["y"], name="abs")
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
+								        make_and_import_model(make_graph([abs], "abs", [x], [y]))
-												Xpu (#82)

* support kunlun xpu and add an operator named Add

* add sub, mul, div, pow, maximum, minimum

* add code

* add xpu code

* add code

* add matmul

* add transpose

* add unary operator

* add unary operator

* add some operator

* add code

* support run resnet18 on xpu

* add code

* add max pool2d

* fix xpu code, let it can run.

* 添加XPU算子 (#120)

* add floordiv for xpu

* add batchnorm for xpu

* add more cast types for xpu

* add conv_trans for xpu

* add pad for xpu

* add logical ops for xpu

* fix format for xpu src and include

* fix format for xpu test

* fix format for xpu src

---------

Co-authored-by: Bolun <bolunz@u.nus.edu>

* Xpu abs (#121)

* add: unary kernel for xpu

* formatting

* format

* format

* format

* fix: pointer jump

* fix optype comments

* fix bug introduced while resolving conflict

* change cmake option for kunlunxin xpu from 'xpu' to 'kunlun'; fix bug after merging distributed infrastructure

* Add doc support for xpu (#141)

* fix

* fix

* fix pooling test

* format

* format

* fix

* fix

* set cmake version requirement

* fix cmakelists

* rename xpu to kunlun

* fix

* fix format

* fix format

* fix format

* fix change name to kunlun

* format

* fix format

* clang format

* fix format

---------

Co-authored-by: root <root@localhost.localdomain>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: Bolun Zhang <48948016+Chamberlain0w0@users.noreply.github.com>
Co-authored-by: Bolun <bolunz@u.nus.edu>
Co-authored-by: zhangyue207 <138768300+zhangyue207@users.noreply.github.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
Co-authored-by: baominghelly <41820386+baominghelly@users.noreply.github.com>
Co-authored-by: Bolun <chamberlain0w0@gmail.com>
											
										
										
											2023-10-16 10:57:08 +08:00
-												Add Neg operator and kernel (#152)

* Add Neg operator and kernel

* handle neg in to_onnx

---------

Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-10-10 10:54:56 +08:00
+								    def test_neg(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        neg = make_node("Neg", ["x"], ["y"], name="neg")
 								        make_and_import_model(make_graph([neg], "neg", [x], [y]))
-												feat: 前端支持 flatten 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 13:50:07 +08:00
 								    def test_identity(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        identity = make_node("Identity", ["x"], ["y"], name="identity")
 								        make_and_import_model(make_graph([identity], "identity", [x], [y]))
 								    def test_flatten(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1 * 3, 5 * 7])
-												Cpu backend2 (#77)

fix review

change Device::MKL to Device::INTELCPU

fix mkl linkage

fix errors according to merge from master

now can call mkl backend

fix softmax/flatten with axis from onnx.

modify README.md

fix memory refree

add env_lotus_intelcpu.sh

fix compile

merge from branch cpu_backend

fix something add gather

fix something

FIX: directory rename from "mkl" to "intelcpu"

ADD: use oneMKL dpcpp interface to implement matmul kernel.

ADD: add dpcpp as compiler for mkl, and fix warnings for clang compiling.
add dpcpp kernel for pow.

ADD: mkl kernel for pad.

ADD: slice mkl kernel.

ADD: reshape/flatten/identity mkl kernel.

ADD: split mkl kernel.

fix compile error

FIX: fix flattenObj with axis.

ADD reduce_mean mkl kernel.

Add concat mkl kernel.

bathNorm for mkl kernel.

sigmoid mkl kernel.

ADD：add mkl kernel for pooling

add more tests for softmax

Now softmax cuda kernel supports any axises.

mkl kernel for softmax

softmax

add axis to softmax operator

add mkl kernel for abs tanh

ADD: relu kernel for mkl

fix binary mkl primitives.

add mkl kernel for binary operators

fix compiler error

move stream to runtime

clang format

add MemoryFormat for tensorObj.

use post_ops for fused conv/deconv

Distinguish mkl  op_timer from cuda op timer.

add act optype to conv and deconv

add operator timer

add mkl kernel for convTransposed

minor fix for group conv

do not use cblas_sgemm_batch

CpuRuntimeObj->NativeCpuRuntimeObj

add  matmul op for mkl
											
										
										
											2023-04-17 12:15:23 +08:00
+								        flatten = make_node("Flatten", ["x"], ["y"], axis=2, name="flatten")
-												support mixed dtype (#102)

* feat: support mixed dtype

* feat: support cast op

* test: add test for cast op

* feat: support datatype BFloat16

* feat: support data convert fp32 <-> bfp16

* fix: fix all op's infershape func

* fix as review comment
											
										
										
											2023-08-16 21:49:43 +08:00
+								        make_and_import_model(make_graph([flatten], "flatten", [x], [y]))
-												feat: 前端支持 relu sigmoid tanh softmax abs 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:54:54 +08:00
-												fix: 改正 reshap 导入

- 从 initializer 拿到 reshape 的 shape 值
- 但 reshape 仍然无法导入，因为无法分辨 shape 其实不是一个后端张量

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 10:14:55 +08:00
+								    def test_reshape(self):
-												feat: 前端支持 concat 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 13:42:35 +08:00
+								        data = make_tensor_value_info("data", TensorProto.FLOAT, [2, 3, 4, 5])
 								        shape = make_tensor_value_info("shape", TensorProto.INT64, [3])
 								        shape_data = make_tensor("shape", TensorProto.INT64, [3], [5, 3, 8])
-												feat: 补充 DataType 类型

- 增加了 6 个代数类型，与 onnx 的序号对应
- 现在可以导入 reshape 了

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 11:27:57 +08:00
+								        reshaped = make_tensor_value_info(
 								            "reshaped", TensorProto.FLOAT, shape_data.int64_data
 								        )
-												fix: 改正 reshap 导入

- 从 initializer 拿到 reshape 的 shape 值
- 但 reshape 仍然无法导入，因为无法分辨 shape 其实不是一个后端张量

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 10:14:55 +08:00
+								        reshape = make_node("Reshape", ["data", "shape"], ["reshaped"], name="reshape")
-												feat: 补充 DataType 类型

- 增加了 6 个代数类型，与 onnx 的序号对应
- 现在可以导入 reshape 了

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 11:27:57 +08:00
+								        make_and_import_model(
 								            make_graph([reshape], "reshape", [data, shape], [reshaped], [shape_data])
 								        )
-												feat: 前端支持 reshape

- 无法测试，因为后端不支持 shape 的 INT64 类型

opt: ReshapeObj 构造改为全部传值并在内部 move
Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 09:50:32 +08:00
-												add frontend resize kernel (#194)

* - add frontend resize kernel

* - fix resize test

* - fix bug
- add onnx test for resize

* fix: modify codes as reviewer suggested

---------

Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-12-29 13:32:56 +08:00
+								    def test_resize(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 128, 40, 40])
 								        roi = make_tensor("roi", TensorProto.FLOAT, [0], [])
 								        scales = make_tensor("scales", TensorProto.FLOAT, [4], [1, 1, 2, 2])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 128, 80, 80])
 								        reshape = make_node("Resize", ["x", "roi", "scales"], ["y"], name="resize")
 								        make_and_import_model(make_graph([reshape], "resize", [x], [y], [roi, scales]))
-												解除前端对onnx infershape功能的依赖 (#206)

* feat: SqueezeOp lift the dependency of onnx infershape.

* feat: UnsqueezeOp lift the dependency of onnx infershape.

* feat: lift the dependency of onnx infershape

* fix: fix Makefile off nccl
											
										
										
											2024-01-12 14:54:27 +08:00
+								    def test_squeeze(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 1, 5])
 								        axes = make_tensor_value_info("axes", TensorProto.INT64, [2])
 								        axes_data = make_tensor("axes", TensorProto.INT64, [2], [0, 2])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [3, 5])
 								        squeeze = make_node("Squeeze", ["input", "axes"], ["output"], name="squeeze")
 								        make_and_import_model(
 								            make_graph([squeeze], "squeeze", [input, axes], [output], [axes_data])
 								        )
 								    def test_unsqueeze(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [2, 3, 4, 5])
 								        axes = make_tensor_value_info("axes", TensorProto.INT64, [2])
 								        axes_data = make_tensor("axes", TensorProto.INT64, [2], [0, 2])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 2, 1, 3, 4, 5])
 								        unsqueeze = make_node(
 								            "Unsqueeze", ["input", "axes"], ["output"], name="unsqueeze"
 								        )
 								        make_and_import_model(
 								            make_graph([unsqueeze], "unsqueeze", [input, axes], [output], [axes_data])
 								        )
-												feat: 前端支持 concat 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 13:42:35 +08:00
+								    def test_concat(self):
 								        input1 = make_tensor_value_info("input1", TensorProto.FLOAT, [1, 3, 2, 4])
 								        input2 = make_tensor_value_info("input2", TensorProto.FLOAT, [1, 3, 2, 5])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 9])
 								        concat = make_node(
 								            "Concat", ["input1", "input2"], ["output"], axis=3, name="concat"
 								        )
 								        make_and_import_model(
 								            make_graph([concat], "concat", [input1, input2], [output])
 								        )
-												feat: 前端支持 gather 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 14:16:01 +08:00
+								    def test_gather(self):
 								        data = make_tensor_value_info("data", TensorProto.FLOAT, [1, 3, 4, 4])
-												Add GatherElements op and cuda kernel (#149)

* Add GatherElements op and cuda kernel

* fix format

* remove print

* remove unused var

* fix spacing

* fix format

---------

Co-authored-by: panzezhong@qiyuanlab.com <panzezhong@zezhongpan>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-10-12 09:18:12 +08:00
+								        indices = make_tensor_value_info("indices", TensorProto.INT64, [2, 1, 2])
-												feat: 前端支持 gather 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 14:16:01 +08:00
+								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 2, 1, 2, 4, 4])
 								        gather = make_node(
 								            "Gather", ["data", "indices"], ["output"], axis=1, name="gather"
 								        )
 								        make_and_import_model(make_graph([gather], "gather", [data, indices], [output]))
-												Add GatherElements op and cuda kernel (#149)

* Add GatherElements op and cuda kernel

* fix format

* remove print

* remove unused var

* fix spacing

* fix format

---------

Co-authored-by: panzezhong@qiyuanlab.com <panzezhong@zezhongpan>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-10-12 09:18:12 +08:00
+								    def test_gather_elements(self):
 								        data = make_tensor_value_info("data", TensorProto.FLOAT, [2, 3, 2])
 								        indices = make_tensor_value_info("indices", TensorProto.INT64, [2, 1, 2])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [2, 1, 2])
 								        gatherElements = make_node(
-												Xpu (#82)

* support kunlun xpu and add an operator named Add

* add sub, mul, div, pow, maximum, minimum

* add code

* add xpu code

* add code

* add matmul

* add transpose

* add unary operator

* add unary operator

* add some operator

* add code

* support run resnet18 on xpu

* add code

* add max pool2d

* fix xpu code, let it can run.

* 添加XPU算子 (#120)

* add floordiv for xpu

* add batchnorm for xpu

* add more cast types for xpu

* add conv_trans for xpu

* add pad for xpu

* add logical ops for xpu

* fix format for xpu src and include

* fix format for xpu test

* fix format for xpu src

---------

Co-authored-by: Bolun <bolunz@u.nus.edu>

* Xpu abs (#121)

* add: unary kernel for xpu

* formatting

* format

* format

* format

* fix: pointer jump

* fix optype comments

* fix bug introduced while resolving conflict

* change cmake option for kunlunxin xpu from 'xpu' to 'kunlun'; fix bug after merging distributed infrastructure

* Add doc support for xpu (#141)

* fix

* fix

* fix pooling test

* format

* format

* fix

* fix

* set cmake version requirement

* fix cmakelists

* rename xpu to kunlun

* fix

* fix format

* fix format

* fix format

* fix change name to kunlun

* format

* fix format

* clang format

* fix format

---------

Co-authored-by: root <root@localhost.localdomain>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: Bolun Zhang <48948016+Chamberlain0w0@users.noreply.github.com>
Co-authored-by: Bolun <bolunz@u.nus.edu>
Co-authored-by: zhangyue207 <138768300+zhangyue207@users.noreply.github.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
Co-authored-by: baominghelly <41820386+baominghelly@users.noreply.github.com>
Co-authored-by: Bolun <chamberlain0w0@gmail.com>
											
										
										
											2023-10-16 10:57:08 +08:00
+								            "GatherElements",
 								            ["data", "indices"],
 								            ["output"],
 								            axis=1,
 								            name="gatherElements",
 								        )
 								        make_and_import_model(
 								            make_graph([gatherElements], "gatherElements", [data, indices], [output])
-												Add GatherElements op and cuda kernel (#149)

* Add GatherElements op and cuda kernel

* fix format

* remove print

* remove unused var

* fix spacing

* fix format

---------

Co-authored-by: panzezhong@qiyuanlab.com <panzezhong@zezhongpan>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-10-12 09:18:12 +08:00
+								        )
-												feat: 前端支持 reduceMean 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 15:35:01 +08:00
+								    def test_reduce_mean(self):
 								        data = make_tensor_value_info("data", TensorProto.FLOAT, [2, 3, 3, 4])
 								        reduced = make_tensor_value_info("reduced", TensorProto.FLOAT, [1, 1, 1, 1])
 								        reduceMean = make_node(
 								            "ReduceMean", ["data"], ["reduced"], keepdims=1, name="reduceMean"
 								        )
 								        make_and_import_model(make_graph([reduceMean], "reduceMean", [data], [reduced]))
-												Add send and recv operators based on NCCL (#182)

* baseline sendrecv, bug

* success sendrecv

* get rank from comm

* set output shape

* successful:set output shape equal to input shape

* shape as attribute

* success:shape as attribute

* success send recv, output 0

* add onnx test

* split send and recv

* success split send and recv

* test-onnx bug

* success test-onnx

* modified onnx.py

* solve review
											
										
										
											2023-12-14 16:38:03 +08:00
-												Add ReduceSum op and kernel (#160)

* Add reduceSum op and kernel

* fix merge and format

* Reduce: reuse cat macro, add doc string

---------

Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-11-24 09:29:58 +08:00
+								    def test_reduce_sum(self):
 								        data = make_tensor_value_info("data", TensorProto.FLOAT, [2, 3, 3, 4])
 								        reduced = make_tensor_value_info("reduced", TensorProto.FLOAT, [1, 1, 1, 1])
 								        reduceSum = make_node(
 								            "ReduceSum", ["data"], ["reduced"], keepdims=1, name="reduceSum"
 								        )
 								        make_and_import_model(make_graph([reduceSum], "reduceSum", [data], [reduced]))
-												feat: 前端支持 reduceMean 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 15:35:01 +08:00
-												feat: 前端支持 slice 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 17:35:18 +08:00
+								    def test_slice(self):
 								        data = make_tensor_value_info("data", TensorProto.UINT32, [10, 64, 162, 162])
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								        output = make_tensor_value_info("output", TensorProto.UINT32, [1, 1, 99, 95])
 								        starts = make_tensor("starts", TensorProto.INT64, [4], [2, 9, 1, 5])
 								        ends = make_tensor("ends", TensorProto.INT64, [4], [3, 10, 100, 100])
-												feat: 前端支持 pad 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-15 11:41:06 +08:00
+								        slice = make_node("Slice", ["data", "starts", "ends"], ["output"], name="slice")
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								        make_and_import_model(
 								            make_graph(
 								                [slice],
 								                "slice",
 								                [data],
 								                [output],
 								                [starts, ends],
 								            )
-												feat: 前端支持 slice 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-14 17:35:18 +08:00
+								        )
-												feat: 前端支持 pad 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-15 11:41:06 +08:00
+								    def test_pad(self):
 								        data = make_tensor_value_info("data", TensorProto.UINT32, [1, 64, 162, 162])
 								        output = make_tensor_value_info("output", TensorProto.UINT32, [3, 84, 164, 172])
 								        pads = make_tensor_value_info("pads", TensorProto.INT64, [8])
 								        pads_data = make_tensor(
 								            "pads", TensorProto.INT64, [8], [2, 10, 1, 5, 0, 10, 1, 5]
 								        )
 								        pad = make_node("Pad", ["data", "pads"], ["output"], name="pad")
 								        make_and_import_model(
 								            make_graph(
 								                [pad],
 								                "pad",
 								                [data, pads],
 								                [output],
 								                [pads_data],
 								            )
 								        )
-												tensor parallel for transformer (#125)

* add cmake bits about NCCL

* move example to examples/NNmodel

* impl NCCL communicator

* add comm related function to Runtime

* export runtime interface

* add launch.py

* use unique name to distingush the the NCCL ID file

* add timeout to communicator init

* expose communicator obj from runtime obj, add unit test for nccl communicator

* reformat files

* Add allReduce operator and cuda nccl allReduce kernel

* impl model parallel for resnet

* add allGather nccl kernel and operator

* Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output

* fix format of onnx.py

* use concat following AllGather

* get tensor parallel for resnet

* fix format of graph_handler.cc

* change BUILD_DIST default to OFF

* polish code of communicator

* update .gitignore

* export min/max to python

* fix MatMul

* modify launch.py to run opt

* hack to treat ReduceSum as AllReduceSum

* throw exception in cuda error

* fix parallel_opt.py

* improve the error prompt and cuda error check

* fix GatherObj::GatherObj member init

* fix size calculation for scalar (rank = 0) tensor

* MatMul supports bias

* fix add bias for row parallel gemm

* add --gen_std to launch.py

* fix AllReduceNCCL

* update launch.py

* less log

* update parallel_opt

* update launch.py

* add __eq__ for Placement sub-classes

* less benchmark run

* fix placement infer for matmul

* fix vacabuary size

* fix Exception

* Add shard tensor with group to support gpt2

* Add find successor function to find split op at different depth

* recover CommunicatorObj

* improve error mesasge

* optimize parallel_opt.py

* optimize launch.py

* recover docs for all_reduce and all_gather

* Fix API

* fix format

---------

Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-09-14 14:19:45 +08:00
-												impl distributed launch with NCCL  (#106)

* add cmake bits about NCCL

* move example to examples/NNmodel

* impl NCCL communicator

* add comm related function to Runtime

* export runtime interface

* add launch.py

* use unique name to distingush the the NCCL ID file

* add timeout to communicator init

* expose communicator obj from runtime obj, add unit test for nccl communicator

* reformat files

* Add allReduce operator and cuda nccl allReduce kernel

* impl model parallel for resnet

* add allGather nccl kernel and operator

* Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output

* fix format of onnx.py

* use concat following AllGather

* get tensor parallel for resnet

* fix format of graph_handler.cc

* change BUILD_DIST default to OFF

* polish code of communicator

* update .gitignore

* Add broadcast operator and cuda kernel

* Add comments for operators

* remove const of class member

* move communicator to CudaRuntimeObj

* Add an empty line at EOF.

---------

Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-09-05 09:47:35 +08:00
+								    def test_allReduceSum(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
 								        allReduceSum = make_node(
 								            "AllReduceSum", ["input"], ["output"], name="allReduceSum"
 								        )
 								        graph = make_graph([allReduceSum], "allReduceSum", [input], [output])
 								        model = make_model(graph)
 								        from_onnx(model, backend.cpu_runtime())
 								    def test_allReduceProd(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
 								        allReduceProd = make_node(
 								            "AllReduceProd", ["input"], ["output"], name="allReduceProd"
 								        )
 								        graph = make_graph([allReduceProd], "allReduceProd", [input], [output])
 								        model = make_model(graph)
 								        from_onnx(model, backend.cpu_runtime())
-												tensor parallel for transformer (#125)

* add cmake bits about NCCL

* move example to examples/NNmodel

* impl NCCL communicator

* add comm related function to Runtime

* export runtime interface

* add launch.py

* use unique name to distingush the the NCCL ID file

* add timeout to communicator init

* expose communicator obj from runtime obj, add unit test for nccl communicator

* reformat files

* Add allReduce operator and cuda nccl allReduce kernel

* impl model parallel for resnet

* add allGather nccl kernel and operator

* Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output

* fix format of onnx.py

* use concat following AllGather

* get tensor parallel for resnet

* fix format of graph_handler.cc

* change BUILD_DIST default to OFF

* polish code of communicator

* update .gitignore

* export min/max to python

* fix MatMul

* modify launch.py to run opt

* hack to treat ReduceSum as AllReduceSum

* throw exception in cuda error

* fix parallel_opt.py

* improve the error prompt and cuda error check

* fix GatherObj::GatherObj member init

* fix size calculation for scalar (rank = 0) tensor

* MatMul supports bias

* fix add bias for row parallel gemm

* add --gen_std to launch.py

* fix AllReduceNCCL

* update launch.py

* less log

* update parallel_opt

* update launch.py

* add __eq__ for Placement sub-classes

* less benchmark run

* fix placement infer for matmul

* fix vacabuary size

* fix Exception

* Add shard tensor with group to support gpt2

* Add find successor function to find split op at different depth

* recover CommunicatorObj

* improve error mesasge

* optimize parallel_opt.py

* optimize launch.py

* recover docs for all_reduce and all_gather

* Fix API

* fix format

---------

Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-09-14 14:19:45 +08:00
-												impl distributed launch with NCCL  (#106)

* add cmake bits about NCCL

* move example to examples/NNmodel

* impl NCCL communicator

* add comm related function to Runtime

* export runtime interface

* add launch.py

* use unique name to distingush the the NCCL ID file

* add timeout to communicator init

* expose communicator obj from runtime obj, add unit test for nccl communicator

* reformat files

* Add allReduce operator and cuda nccl allReduce kernel

* impl model parallel for resnet

* add allGather nccl kernel and operator

* Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output

* fix format of onnx.py

* use concat following AllGather

* get tensor parallel for resnet

* fix format of graph_handler.cc

* change BUILD_DIST default to OFF

* polish code of communicator

* update .gitignore

* Add broadcast operator and cuda kernel

* Add comments for operators

* remove const of class member

* move communicator to CudaRuntimeObj

* Add an empty line at EOF.

---------

Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-09-05 09:47:35 +08:00
+								    def test_allReduceMin(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
 								        allReduceMin = make_node(
 								            "AllReduceMin", ["input"], ["output"], name="allReduceMin"
 								        )
 								        graph = make_graph([allReduceMin], "allReduceMin", [input], [output])
 								        model = make_model(graph)
 								        from_onnx(model, backend.cpu_runtime())
 								    def test_allReduceMax(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
 								        allReduceMax = make_node(
 								            "AllReduceMax", ["input"], ["output"], name="allReduceMax"
 								        )
 								        graph = make_graph([allReduceMax], "allReduceMax", [input], [output])
 								        model = make_model(graph)
 								        from_onnx(model, backend.cpu_runtime())
 								    def test_allReduceAvg(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
 								        allReduceAvg = make_node(
 								            "AllReduceAvg", ["input"], ["output"], name="allReduceAvg"
 								        )
 								        graph = make_graph([allReduceAvg], "allReduceAvg", [input], [output])
 								        model = make_model(graph)
 								        from_onnx(model, backend.cpu_runtime())
-												tensor parallel for transformer (#125)

* add cmake bits about NCCL

* move example to examples/NNmodel

* impl NCCL communicator

* add comm related function to Runtime

* export runtime interface

* add launch.py

* use unique name to distingush the the NCCL ID file

* add timeout to communicator init

* expose communicator obj from runtime obj, add unit test for nccl communicator

* reformat files

* Add allReduce operator and cuda nccl allReduce kernel

* impl model parallel for resnet

* add allGather nccl kernel and operator

* Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output

* fix format of onnx.py

* use concat following AllGather

* get tensor parallel for resnet

* fix format of graph_handler.cc

* change BUILD_DIST default to OFF

* polish code of communicator

* update .gitignore

* export min/max to python

* fix MatMul

* modify launch.py to run opt

* hack to treat ReduceSum as AllReduceSum

* throw exception in cuda error

* fix parallel_opt.py

* improve the error prompt and cuda error check

* fix GatherObj::GatherObj member init

* fix size calculation for scalar (rank = 0) tensor

* MatMul supports bias

* fix add bias for row parallel gemm

* add --gen_std to launch.py

* fix AllReduceNCCL

* update launch.py

* less log

* update parallel_opt

* update launch.py

* add __eq__ for Placement sub-classes

* less benchmark run

* fix placement infer for matmul

* fix vacabuary size

* fix Exception

* Add shard tensor with group to support gpt2

* Add find successor function to find split op at different depth

* recover CommunicatorObj

* improve error mesasge

* optimize parallel_opt.py

* optimize launch.py

* recover docs for all_reduce and all_gather

* Fix API

* fix format

---------

Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-09-14 14:19:45 +08:00
-												impl distributed launch with NCCL  (#106)

* add cmake bits about NCCL

* move example to examples/NNmodel

* impl NCCL communicator

* add comm related function to Runtime

* export runtime interface

* add launch.py

* use unique name to distingush the the NCCL ID file

* add timeout to communicator init

* expose communicator obj from runtime obj, add unit test for nccl communicator

* reformat files

* Add allReduce operator and cuda nccl allReduce kernel

* impl model parallel for resnet

* add allGather nccl kernel and operator

* Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output

* fix format of onnx.py

* use concat following AllGather

* get tensor parallel for resnet

* fix format of graph_handler.cc

* change BUILD_DIST default to OFF

* polish code of communicator

* update .gitignore

* Add broadcast operator and cuda kernel

* Add comments for operators

* remove const of class member

* move communicator to CudaRuntimeObj

* Add an empty line at EOF.

---------

Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-09-05 09:47:35 +08:00
+								    def test_split(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
-												tensor parallel for transformer (#125)

* add cmake bits about NCCL

* move example to examples/NNmodel

* impl NCCL communicator

* add comm related function to Runtime

* export runtime interface

* add launch.py

* use unique name to distingush the the NCCL ID file

* add timeout to communicator init

* expose communicator obj from runtime obj, add unit test for nccl communicator

* reformat files

* Add allReduce operator and cuda nccl allReduce kernel

* impl model parallel for resnet

* add allGather nccl kernel and operator

* Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output

* fix format of onnx.py

* use concat following AllGather

* get tensor parallel for resnet

* fix format of graph_handler.cc

* change BUILD_DIST default to OFF

* polish code of communicator

* update .gitignore

* export min/max to python

* fix MatMul

* modify launch.py to run opt

* hack to treat ReduceSum as AllReduceSum

* throw exception in cuda error

* fix parallel_opt.py

* improve the error prompt and cuda error check

* fix GatherObj::GatherObj member init

* fix size calculation for scalar (rank = 0) tensor

* MatMul supports bias

* fix add bias for row parallel gemm

* add --gen_std to launch.py

* fix AllReduceNCCL

* update launch.py

* less log

* update parallel_opt

* update launch.py

* add __eq__ for Placement sub-classes

* less benchmark run

* fix placement infer for matmul

* fix vacabuary size

* fix Exception

* Add shard tensor with group to support gpt2

* Add find successor function to find split op at different depth

* recover CommunicatorObj

* improve error mesasge

* optimize parallel_opt.py

* optimize launch.py

* recover docs for all_reduce and all_gather

* Fix API

* fix format

---------

Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-09-14 14:19:45 +08:00
+								        split = make_node("Split", ["input"], ["output"], name="split", axis=0)
-												fix mlu some kernel registration & gather op (#210)

* fix: fix bang build/kernel registration | test_onnx

* delete assert float

* fix gather

* fix CMakeLists and Reshape

* fix cncl ops

* add hardsigmoid/hardswish

* fix

* add invalid datatype exception

* fix gather

* fix gather indices type

* fix gather/prelu/hardsigmoid on mlu

* fix format

* fix

---------

Co-authored-by: Bolun Zhang <48948016+Chamberlain0w0@users.noreply.github.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
Co-authored-by: Zhang Bolun <Chamberlain0w0@gmail.com>
											
										
										
											2024-02-01 15:02:02 +08:00
+								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
 								        make_and_import_model(make_graph([split], "split", [input], [output]))
-												tensor parallel for transformer (#125)

* add cmake bits about NCCL

* move example to examples/NNmodel

* impl NCCL communicator

* add comm related function to Runtime

* export runtime interface

* add launch.py

* use unique name to distingush the the NCCL ID file

* add timeout to communicator init

* expose communicator obj from runtime obj, add unit test for nccl communicator

* reformat files

* Add allReduce operator and cuda nccl allReduce kernel

* impl model parallel for resnet

* add allGather nccl kernel and operator

* Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output

* fix format of onnx.py

* use concat following AllGather

* get tensor parallel for resnet

* fix format of graph_handler.cc

* change BUILD_DIST default to OFF

* polish code of communicator

* update .gitignore

* export min/max to python

* fix MatMul

* modify launch.py to run opt

* hack to treat ReduceSum as AllReduceSum

* throw exception in cuda error

* fix parallel_opt.py

* improve the error prompt and cuda error check

* fix GatherObj::GatherObj member init

* fix size calculation for scalar (rank = 0) tensor

* MatMul supports bias

* fix add bias for row parallel gemm

* add --gen_std to launch.py

* fix AllReduceNCCL

* update launch.py

* less log

* update parallel_opt

* update launch.py

* add __eq__ for Placement sub-classes

* less benchmark run

* fix placement infer for matmul

* fix vacabuary size

* fix Exception

* Add shard tensor with group to support gpt2

* Add find successor function to find split op at different depth

* recover CommunicatorObj

* improve error mesasge

* optimize parallel_opt.py

* optimize launch.py

* recover docs for all_reduce and all_gather

* Fix API

* fix format

---------

Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-09-14 14:19:45 +08:00
-												fix Issue 187 split infershape wrong (#197)

* fix: fix splitOp to support unequal portions

* fix: fix as review comment

---------

Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-12-28 21:39:24 +08:00
+								    def test_split1(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
-												fix mlu some kernel registration & gather op (#210)

* fix: fix bang build/kernel registration | test_onnx

* delete assert float

* fix gather

* fix CMakeLists and Reshape

* fix cncl ops

* add hardsigmoid/hardswish

* fix

* add invalid datatype exception

* fix gather

* fix gather indices type

* fix gather/prelu/hardsigmoid on mlu

* fix format

* fix

---------

Co-authored-by: Bolun Zhang <48948016+Chamberlain0w0@users.noreply.github.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
Co-authored-by: Zhang Bolun <Chamberlain0w0@gmail.com>
											
										
										
											2024-02-01 15:02:02 +08:00
+								        splitAttr = make_tensor("split", TensorProto.INT64, [2], [2, 1])
 								        output1 = make_tensor_value_info("output1", TensorProto.FLOAT, [1, 2, 2, 4])
 								        output2 = make_tensor_value_info("output2", TensorProto.FLOAT, [1, 1, 2, 4])
 								        split = make_node(
 								            "Split", ["input", "split"], ["output1", "output2"], name="split", axis=1
 								        )
 								        make_and_import_model(
 								            make_graph([split], "split", [input], [output1, output2], [splitAttr])
 								        )
-												fix Issue 187 split infershape wrong (#197)

* fix: fix splitOp to support unequal portions

* fix: fix as review comment

---------

Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-12-28 21:39:24 +08:00
-												impl distributed launch with NCCL  (#106)

* add cmake bits about NCCL

* move example to examples/NNmodel

* impl NCCL communicator

* add comm related function to Runtime

* export runtime interface

* add launch.py

* use unique name to distingush the the NCCL ID file

* add timeout to communicator init

* expose communicator obj from runtime obj, add unit test for nccl communicator

* reformat files

* Add allReduce operator and cuda nccl allReduce kernel

* impl model parallel for resnet

* add allGather nccl kernel and operator

* Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output

* fix format of onnx.py

* use concat following AllGather

* get tensor parallel for resnet

* fix format of graph_handler.cc

* change BUILD_DIST default to OFF

* polish code of communicator

* update .gitignore

* Add broadcast operator and cuda kernel

* Add comments for operators

* remove const of class member

* move communicator to CudaRuntimeObj

* Add an empty line at EOF.

---------

Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-09-05 09:47:35 +08:00
+								    def test_allBroadcast(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 2, 4])
 								        broadcast = make_node(
 								            "Broadcast", ["input"], ["output"], name="broadcast", root=1
 								        )
 								        graph = make_graph([broadcast], "broadcast", [input], [output])
 								        model = make_model(graph)
 								        from_onnx(model, backend.cpu_runtime())
 								    def test_allGather(self):
 								        input = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 2, 4])
 								        world_size = make_tensor_value_info("world_size", TensorProto.INT32, [1])
 								        allGather = make_node(
 								            "AllGather", ["input", "world_size"], ["output"], name="allGather"
 								        )
 								        graph = make_graph([allGather], "allGather", [input, world_size], [])
 								        model = make_model(graph)
 								        from_onnx(model, backend.cpu_runtime())
-												feat: 前端支持 pad 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-15 11:41:06 +08:00
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
+								    # see <https://onnx.ai/onnx/intro/python.html#a-simple-example-a-linear-regression>
 								    def test_linear(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 2, 3])
 								        a = make_tensor_value_info("a", TensorProto.FLOAT, [1, 3, 4])
 								        b = make_tensor_value_info("b", TensorProto.FLOAT, [1, 2, 4])
-												feat: 前端支持 identity 及单元测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 12:13:01 +08:00
+								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 2, 4])
-												feat: 增加 add sub mul div pow 前端

- 添加每个算子的单元测试
- 添加线性回归模型导入测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-13 11:25:54 +08:00
+								        matmul = make_node("MatMul", ["x", "a"], ["xa"], name="matmul")
 								        add = make_node("Add", ["xa", "b"], ["y"], name="add")
 								        graph = make_graph([matmul, add], "lr", [x, a, b], [y])
-												feat: 创建 pyinfinitensor 前端

- python 前端项目结构及打包和安装脚本
- 后端编译出 so 改名为 backend，增加 GraphHandler 修改图结构
- ci 支持测试这些功能

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-12 08:23:49 +08:00
+								        model = make_model(graph)
 								        check_model(model)
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								        from_onnx(model, backend.cpu_runtime())
-												feat: 创建 pyinfinitensor 前端

- python 前端项目结构及打包和安装脚本
- 后端编译出 so 改名为 backend，增加 GraphHandler 修改图结构
- ci 支持测试这些功能

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-12 08:23:49 +08:00
 								    def test_frontend(self):
-												Dev for 202303ddl (#66)

* add activation operatiopn relu, tanh, sigmoid on mlu

* commit for format

* add activation backward operation

* add test for activation_backward

* add test

* add convbpfilter

* fix

* add transpsoe code and test

* add trigon function operation on mlu: sin,cos,tan,asin,sinh,asinh

* add copy operation on mlu

* add ceil operation and floor operation

* add operation clip

* add operation cnnl div, test and test for divdemo bangc kernel

* add divnonan operation and test

* add erf operation

* add exp operation

* add operation fill

* add log operation

* add log1p operation

* add l2loss operation

* add maximum and minimum operation

* add mseloss operation

* add negTensor operation

* add power operation

* add reciprocal operation

* add sqrt and rsqrt operation

* add transform operation

* add addn operation

* add muln operation

* cherrry pick some operation

* add floordiv operation and floordivtrunc operation

* add floormod operation

* add cumsum operation

* add det operation

* add pad operation

* format

* add concat operation

* format

* add split operation

* fix concat and split operation

* add round operation

* add pooling operation

* add square operation

* add squaredDifference operation

* code format fix

* add flip operation

* code format fix

* add hardtanh operation

* add logic operation

* add addcdiv and addcmul operation

* add arange operation

* add bitcompute operation

* add net test

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: rename

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 用 NativeCpuRuntime 替换 CpuRuntime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code

* fix code

* fix code by review suggestion

* remove operation which is not the onnx operation

* fix format

* clang format

* refactor: tensor 的 print 加一层模板的 dataToString

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: onnx 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 增加计算图优化接口

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add clip operation

* feat: 支持导入 clip

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* test: 导入导出测试加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix batch norm

* feat: 增加 Shape 算子

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入 unsqueeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正 clip 接口

feat: 支持导入 transpose
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add broadcast operation

* fix elementwise-broadcast

* fix elementwise broadcast

* add broadcast for gpu elementsie

* feat: pad 支持 axes 负数

feat: 不支持的 padding 导出为独立的 pad 算子

feat: 支持导入 onnxsim 过的 inception
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修正池化的测试

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 pads，支持 inception 导入导出，已加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 densenet 导入导出，并加入 ci

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 squeeze

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix softmax

* feat: 导出 clip 和 transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持 Conv 的 bias

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: bias of conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导入 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 导出 split

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: conv group

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: matmul 的 bias 没有放在输入里，修正

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix exmaple

* fix: 改正 reduce_mean 导出

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* refactor: 修改 slice 实现与 onnx 一致

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* style: 不导出两个 runtime 函数

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 中文使用指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* doc: 补全指南

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: 修复导入数据的问题

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 添加 Dropout 基本结构，但不支持两个输出是不同的类型

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 重新导出优化接口

feat: dropout 导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>

* build: BANG 选项加入 Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fxi code, change of test/kernels/bang/test* is use NativeCpuRuntime.
chaneg of include/bang/bang_runtime is for the cntoolkit upgrade.

* feat: 导出 bang runtime

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add USE_BANG=1

* fix matmul

* fix reshape

* fix

* fix activation

* fix transpose

* format

* format

* update Makefile

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add prelu on mlu

* fix: ConvTranspose

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* feat: 支持导入导出 PRelu

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* add convtrans on mlu

* fmt

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* docs: 更新 README_CN.md

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix code by review suggestions

* style

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix: Softmax 的 axis 可以用默认值？感觉是 onnx 不标准

Signed-off-by: YdrMaster <ydrml@hotmail.com>

* fix cuda & intelcpu bugs after merging

---------

Signed-off-by: YdrMaster <ydrml@hotmail.com>
Co-authored-by: wanghailu <wanghailu0717@163.com>
Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
Co-authored-by: whjthu <haojie0429@gmail.com>
											
										
										
											2023-04-18 15:10:33 +08:00
+								        handler = backend.GraphHandler(backend.cpu_runtime())
-												feat: 导出加减乘除幂到 onnx

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-17 17:15:15 +08:00
+								        a = handler.tensor([1, 2, 3], 12)
 								        b = handler.tensor([1, 2, 3], 12)
 								        c = handler.tensor([1, 2, 3], 12)
 								        d = handler.tensor([1, 2, 3], 12)
 								        e = handler.tensor([1, 2, 3], 12)
-												feat: 导出 Reshape 到 onnx

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-21 14:30:06 +08:00
+								        x = handler.add(
 								            handler.add(handler.add(handler.add(a, b, None), c, None), d, None), e, None
 								        )
 								        y = handler.tensor([3, 2, 1], 12)
 								        handler.reshape(x, y, [3, 2, 1])
-												feat: 创建 pyinfinitensor 前端

- python 前端项目结构及打包和安装脚本
- 后端编译出 so 改名为 backend，增加 GraphHandler 修改图结构
- ci 支持测试这些功能

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-12 08:23:49 +08:00
-												support mixed dtype (#102)

* feat: support mixed dtype

* feat: support cast op

* test: add test for cast op

* feat: support datatype BFloat16

* feat: support data convert fp32 <-> bfp16

* fix: fix all op's infershape func

* fix as review comment
											
										
										
											2023-08-16 21:49:43 +08:00
+								    def test_cast(self):
 								        input1 = make_tensor_value_info("input1", TensorProto.FLOAT, [1, 3, 2, 4])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT16, [1, 3, 2, 4])
 								        cast = make_node(
 								            "Cast", ["input1"], ["output"], to=TensorProto.FLOAT16, name="cast"
 								        )
 								        make_and_import_model(make_graph([cast], "cast", [input1], [output]))
-												框架支持bert/gpt2模型构图 (#94)

* feat: support to sqrt op

* feat: support to erf op

* feat: support to expand op

* feat: support to where op

* fix: gather op index can be int64_t(hard coding)

* fix: some wrong use

* style: fix the format style

* test: add test for change op

* fix: rebase to master

* fix: fix matmul b compute wrong

* add expand and where kernel

* Add int64 support for cuda gather kernel

* add test_where.cc

* add "expand.(cu/cc,test,cuda),modified where.cu"

* Separate initialization of datatypes to avoid compile error

* modify where.(cu/cc/h,test), expand and clip

* Format fix

* Format fix

---------

Co-authored-by: xgqdut2016 <kenan_gewei@163.com>
Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-08-29 16:06:52 +08:00
+								    def test_expand(self):
 								        data = make_tensor_value_info("data", TensorProto.FLOAT, [3, 1])
 								        dim = make_tensor_value_info("dim", TensorProto.INT64, [3])
 								        dim_data = make_tensor("dim", TensorProto.INT64, [3], [2, 1, 6])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [2, 3, 6])
 								        expand = make_node("Expand", ["data", "dim"], ["output"], name="expand")
 								        make_and_import_model(
 								            make_graph([expand], "expand", [data, dim], [output], [dim_data])
 								        )
 								    def test_where(self):
 								        x = make_tensor_value_info("x", TensorProto.FLOAT, [1, 3, 5, 7])
 								        y = make_tensor_value_info("y", TensorProto.FLOAT, [1, 3, 5, 7])
 								        con = make_tensor_value_info("con", TensorProto.BOOL, [1, 3, 5, 7])
 								        output = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 5, 7])
 								        where = make_node("Where", ["x", "y", "con"], ["output"], name="where")
 								        make_and_import_model(make_graph([where], "where", [x, y, con], [output]))
-												Add send and recv operators based on NCCL (#182)

* baseline sendrecv, bug

* success sendrecv

* get rank from comm

* set output shape

* successful:set output shape equal to input shape

* shape as attribute

* success:shape as attribute

* success send recv, output 0

* add onnx test

* split send and recv

* success split send and recv

* test-onnx bug

* success test-onnx

* modified onnx.py

* solve review
											
										
										
											2023-12-14 16:38:03 +08:00
+								    def test_send(self):
 								        sendInput = make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 5, 7])
 								        send = make_node("Send", ["input"], [], name="send", source=0, destination=1)
 								        graph = make_graph([send], "send", [sendInput], [])
 								        model = make_model(graph)
 								        from_onnx(model, backend.cpu_runtime())
 								    def test_recv(self):
 								        recvOutput = make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 5, 7])
 								        recv = make_node(
 								            "Recv",
 								            [],
 								            ["output"],
 								            name="recv",
 								            source=0,
 								            destination=1,
 								            shape=[1, 3, 5, 7],
 								            dataType=1,
 								        )
 								        graph = make_graph([recv], "recv", [], [recvOutput])
 								        model = make_model(graph)
 								        from_onnx(model, backend.cpu_runtime())
-												tensor parallel for transformer (#125)

* add cmake bits about NCCL

* move example to examples/NNmodel

* impl NCCL communicator

* add comm related function to Runtime

* export runtime interface

* add launch.py

* use unique name to distingush the the NCCL ID file

* add timeout to communicator init

* expose communicator obj from runtime obj, add unit test for nccl communicator

* reformat files

* Add allReduce operator and cuda nccl allReduce kernel

* impl model parallel for resnet

* add allGather nccl kernel and operator

* Add allreduce allgather operator tests, change allgather kernel to output list of tensor, fix shape infer, handle nullptr output

* fix format of onnx.py

* use concat following AllGather

* get tensor parallel for resnet

* fix format of graph_handler.cc

* change BUILD_DIST default to OFF

* polish code of communicator

* update .gitignore

* export min/max to python

* fix MatMul

* modify launch.py to run opt

* hack to treat ReduceSum as AllReduceSum

* throw exception in cuda error

* fix parallel_opt.py

* improve the error prompt and cuda error check

* fix GatherObj::GatherObj member init

* fix size calculation for scalar (rank = 0) tensor

* MatMul supports bias

* fix add bias for row parallel gemm

* add --gen_std to launch.py

* fix AllReduceNCCL

* update launch.py

* less log

* update parallel_opt

* update launch.py

* add __eq__ for Placement sub-classes

* less benchmark run

* fix placement infer for matmul

* fix vacabuary size

* fix Exception

* Add shard tensor with group to support gpt2

* Add find successor function to find split op at different depth

* recover CommunicatorObj

* improve error mesasge

* optimize parallel_opt.py

* optimize launch.py

* recover docs for all_reduce and all_gather

* Fix API

* fix format

---------

Co-authored-by: panzezhong <panzezhong@qiyuanlab.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-09-14 14:19:45 +08:00
-												support Dynamic tensor infer shape and fix memory pool (#176)

* feat: support dynamic tensor part1

* feat: support dynamic-tensor part2

* feat: support dynamic tensor part 3

* fix: fix some ..

* - add kvcache example

* feat: support concat to identity kernel

* add a simple mempory pool for allocator

* fix: rebase to master

* fix bug after merging

* - remove outdated script

* fix: fix as review

---------

Co-authored-by: kilinchange <kilinchange@163.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-11-23 13:11:50 +08:00
+								class TestDynamicTensor(unittest.TestCase):
 								    def test_dynamic_tensor(self):
 								        filename = r"resnet18-v2-7.onnx"
 								        current_path = os.getcwd()
 								        model_file = ""
 								        for root, dirs, files in os.walk(current_path):
 								            if filename in files:
 								                model_file = os.path.join(root, filename)
-												Add send and recv operators based on NCCL (#182)

* baseline sendrecv, bug

* success sendrecv

* get rank from comm

* set output shape

* successful:set output shape equal to input shape

* shape as attribute

* success:shape as attribute

* success send recv, output 0

* add onnx test

* split send and recv

* success split send and recv

* test-onnx bug

* success test-onnx

* modified onnx.py

* solve review
											
										
										
											2023-12-14 16:38:03 +08:00
-												support Dynamic tensor infer shape and fix memory pool (#176)

* feat: support dynamic tensor part1

* feat: support dynamic-tensor part2

* feat: support dynamic tensor part 3

* fix: fix some ..

* - add kvcache example

* feat: support concat to identity kernel

* add a simple mempory pool for allocator

* fix: rebase to master

* fix bug after merging

* - remove outdated script

* fix: fix as review

---------

Co-authored-by: kilinchange <kilinchange@163.com>
Co-authored-by: Haojie Wang <haojie0429@gmail.com>
											
										
										
											2023-11-23 13:11:50 +08:00
+								        model = OnnxStub(onnx.load(model_file), backend.cpu_runtime())
 								        output_key = list(model.outputs.keys())[0]
 								        old_output_shape = model.getShape(output_key)
 								        self.assertEqual(old_output_shape, ([1, 1000]))
 								        model.set_input([[5, 3, 224, 224]])
 								        new_output_shape = model.getShape(output_key)
 								        self.assertEqual(new_output_shape, ([5, 1000]))
-												feat: 创建 pyinfinitensor 前端

- python 前端项目结构及打包和安装脚本
- 后端编译出 so 改名为 backend，增加 GraphHandler 修改图结构
- ci 支持测试这些功能

Signed-off-by: YdrMaster <ydrml@hotmail.com>

											
										
										
											2023-02-12 08:23:49 +08:00
+								if __name__ == "__main__":
 								    unittest.main()