diff --git a/include/core/graph_handler.h b/include/core/graph_handler.h index a76197de..2ce4a4a6 100644 --- a/include/core/graph_handler.h +++ b/include/core/graph_handler.h @@ -80,6 +80,8 @@ class GraphHandlerObj { const optional> &steps); Tensor pad(Tensor input, Tensor output, const vector &pads, const optional> &axes); + Tensor constant(Tensor output); + Tensor unsqueeze(Tensor input, const vector &axes, Tensor output); //------ modifiers diff --git a/include/core/operator.h b/include/core/operator.h index f584b385..3ccfa3a4 100644 --- a/include/core/operator.h +++ b/include/core/operator.h @@ -25,6 +25,7 @@ enum class OpType { Mul, Div, Pow, + Sqrt, Gather, ReduceMean, Reshape, @@ -39,6 +40,8 @@ enum class OpType { Tanh, Abs, Resize, + Constant, + Unsqueeze, // MemBound = 300, }; diff --git a/include/core/tensor.h b/include/core/tensor.h index 72a3b007..47c9b964 100644 --- a/include/core/tensor.h +++ b/include/core/tensor.h @@ -104,7 +104,7 @@ class TensorObj : public TensorBaseObj { private: void printDataFloat() const; - void printDataUint32_t() const; + template void printDataInteger() const; template bool equalDataImpl(const T *a, const T *b, size_t size) const { diff --git a/include/operators/constant.h b/include/operators/constant.h new file mode 100644 index 00000000..c0bb336d --- /dev/null +++ b/include/operators/constant.h @@ -0,0 +1,29 @@ +#pragma once + +#include "core/operator.h" + +namespace infini { +class ConstantObj : public OperatorObj { + + public: + ConstantObj(GraphObj *graph, Tensor output) + : OperatorObj(OpType::Constant, {}, {output}) { + IT_ASSERT(output); + IT_ASSERT(checkValid(graph)); + } + OP_CLONE(ConstantObj); + + optional> inferShape(const TensorVec &inputs) const { + return {{outputs[0]->getDims()}}; + }; + + std::string toString() const override; + int numInputs() const override { return 0; } + int numOutputs() const override { return 1; } + void makeConstant() {} + + private: + vector getWorkloadVector() const override; + vector getOpAttrVector() const override; +}; +} // namespace infini diff --git a/include/operators/element_wise.h b/include/operators/element_wise.h index c3ffe9f1..82b9ed62 100644 --- a/include/operators/element_wise.h +++ b/include/operators/element_wise.h @@ -46,4 +46,5 @@ DEFINE_ELEMENT_WISE_OBJ(Sub, OpType::Sub) DEFINE_ELEMENT_WISE_OBJ(Mul, OpType::Mul) DEFINE_ELEMENT_WISE_OBJ(Div, OpType::Div) DEFINE_ELEMENT_WISE_OBJ(Pow, OpType::Pow) +DEFINE_ELEMENT_WISE_OBJ(Sqrt, OpType::Sqrt) }; // namespace infini diff --git a/include/operators/unsqueeze.h b/include/operators/unsqueeze.h new file mode 100644 index 00000000..4b9291ac --- /dev/null +++ b/include/operators/unsqueeze.h @@ -0,0 +1,23 @@ +#include "core/operator.h" + +namespace infini { +class UnsqueezeObj : public OperatorObj { + set axis; + + public: + UnsqueezeObj(GraphObj *graph, Tensor input, const vector &axis, + Tensor output); + OP_CLONE(UnsqueezeObj); + + optional> inferShape(const TensorVec &inputs) const override; + std::string toString() const override; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + + private: + bool parseAxis(const std::vector &index, std::set &axis) const; + vector getWorkloadVector() const override; + vector getOpAttrVector() const override; +}; + +} // namespace infini diff --git a/include/utils/data_generator.h b/include/utils/data_generator.h index 6a106d2e..0be2b09d 100644 --- a/include/utils/data_generator.h +++ b/include/utils/data_generator.h @@ -16,6 +16,8 @@ class DataGenerator { fill(reinterpret_cast(data), size); else if (dataType == DataType::Float32) fill(reinterpret_cast(data), size); + else if (dataType == DataType::Int64) + fill(reinterpret_cast(data), size); else IT_TODO_HALT(); } diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 360f5aaa..856102f2 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -1,4 +1,5 @@ import backend +import numpy as np from onnx import ( ModelProto, TensorProto, @@ -24,13 +25,16 @@ from onnx.checker import ( from onnx.shape_inference import infer_shapes from typing import Dict, List, Any, Tuple, Sequence, Union, Optional from functools import reduce +import struct +# TODO: do we need need runtime here runtime = backend.runtime() class OnnxStub: - inputs: Dict[str, backend.Tensor] = {} - outputs: Dict[str, backend.Tensor] = {} - initializer: Dict[int, TensorProto] = {} + inputs: Dict[str, backend.Tensor] = {} #只包含图最初的输入,不是包含每一层的输入 + outputs: Dict[str, backend.Tensor] = {} #只包含图最后的输出 + initializer: Dict[int, TensorProto] = {} #包含图每一层的权重和参数 + tensors: Dict[int, TensorProto] = {} #包含所有tensor handler: backend.GraphHandler def __init__(self, model: ModelProto, runtime): @@ -39,13 +43,13 @@ class OnnxStub: tensors: Dict[str, backend.Tensor] = dict() data: Dict[str, TensorProto] = dict() - + for input in model.graph.input: dims = _take_shape_dim(input.type.tensor_type.shape) tensors[input.name] = self.handler.tensor( dims, input.type.tensor_type.elem_type ) - + for output in model.graph.output: dims = _take_shape_dim(output.type.tensor_type.shape) tensors[output.name] = self.handler.tensor( @@ -54,6 +58,10 @@ class OnnxStub: for initializer in model.graph.initializer: data[initializer.name] = initializer + # weights and params are not in "model.graph.input" when .onnx is converted from .pth + tensors[initializer.name] = self.handler.tensor( + initializer.dims, initializer.data_type + ) for node in model.graph.node: if node.op_type == "Conv": @@ -331,9 +339,30 @@ class OnnxStub: _parse_data(data[node.input[1]]), _parse_data(data[node.input[3]]) if len(node.input) > 3 else None, ) + elif node.op_type == "Constant": + attr = next((attr for attr in node.attribute if attr.name == "value"), None) + if attr == None: + raise Exception("no value in constant nodeproto") + if attr.type == 4: #TENSOR + value_tensor = self.handler.tensor( + [self.get_data_len(attr.t)], attr.t.data_type + ) + value_tensor.data_malloc() + self.fill_value(value_tensor, attr.t) + tensors[node.output[0]] = self.handler.constant( + value_tensor + ) + else : + raise Exception("TODO") + elif node.op_type == "Unsqueeze": + tensors[node.output[0]] = self.handler.unsqueeze( + tensors[node.input[0]], + _parse_data(data[node.input[1]]) if len(node.input) > 1 else next((attr.ints for attr in node.attribute if attr.name == "axes")), + tensors.get(node.output[0])) else: + print(node) raise Exception('Unsupported operator "{}"'.format(node.op_type)) - + self.handler.data_malloc() for name, obj in tensors.items(): @@ -341,19 +370,84 @@ class OnnxStub: if tensor == None: if any(input.name == name for input in model.graph.input): self.inputs[name] = obj - else: + if tensor != None: self.initializer[obj.fuid()] = tensor - if tensor.data_type == TensorProto.INT32: - obj.copyin_int32([int(i) for i in tensor.int32_data]) - elif tensor.data_type == TensorProto.INT64: - obj.copyin_int64([int(i) for i in tensor.int64_data]) - elif tensor.data_type == TensorProto.FLOAT: - obj.copyin_float([int(i) for i in tensor.float_data]) - else: - assert False, "Unsupported Tensor Type: {}".format(tensor.data_type) + self.fill_value(obj, tensor) for output in model.graph.output: - self.outputs[output.name] = tensors[output.name] + self.outputs[output.name] = tensors[output.name] + + self.tensors = tensors + + def get_data_len(self, tensor)-> int: + length = 0 + ele_size=1 + if tensor.data_type == TensorProto.INT32: + length = len(tensor.int32_data) + ele_size = 4 + elif tensor.data_type == TensorProto.INT64: + length = len(tensor.int64_data) + ele_size = 8 + elif tensor.data_type == TensorProto.FLOAT: + length = len(tensor.float_data) + ele_size = 4 + else: + assert False, "Unsupported Tensor Type: {}".format(tensor.data_type) + if length == 0: + length = len(tensor.raw_data) // ele_size + return length + + def fill_inputs(self, inputs: List[TensorProto]): + for tensor in inputs: + obj = self.inputs[tensor.name] + self.fill_value(obj, tensor) + + def fill_value(self, obj, tensor): + # Data may be stored in raw_data when .onnx is converted from .pth + # or when parsing Constant oprator + if tensor.data_type == TensorProto.INT32: + int32_data = tensor.int32_data + if len(tensor.int32_data) == 0: + int32_data = self.parse_data(tensor.raw_data, tensor.data_type) + obj.copyin_int32([int(i) for i in tensor.int32_data]) + elif tensor.data_type == TensorProto.INT64: + int64_data = tensor.int64_data + if len(tensor.int64_data) == 0: + int64_data = self.parse_data(tensor.raw_data, tensor.data_type) + # todo: convert int64 to int32, because backend donot support int64! + obj.copyin_int32([np.int32(i) for i in int64_data]) + elif tensor.data_type == TensorProto.FLOAT: + float_data = tensor.float_data + if len(float_data) == 0: + float_data = self.parse_data(tensor.raw_data, tensor.data_type) + obj.copyin_float([i for i in float_data]) + else: + assert False, "Unsupported Tensor Type: {}".format(tensor.data_type) + + def parse_data(self, raw_data, dtype): + if dtype == TensorProto.FLOAT: + fmt = 'f' + elem_size = 4 + elif dtype == TensorProto.FLOAT16: + fmt = 'e' + elem_size = 2 + elif dtype == TensorProto.INT32: + fmt = 'i' + elem_size = 4 + elif dtype == TensorProto.INT64: + fmt = 'q' + elem_size = 8 + else: + raise ValueError('Unsupported data type') + num_elems = len(raw_data) // elem_size + data = [] + for i in range(num_elems): + start_idx = i * elem_size + end_idx = start_idx + elem_size + elem_bytes = raw_data[start_idx:end_idx] + elem_value = struct.unpack(fmt, elem_bytes)[0] + data.append(elem_value) + return data def to_onnx(self, name: str) -> ModelProto: class Context: @@ -576,9 +670,11 @@ def from_onnx(model: ModelProto, runtime): stub = OnnxStub(model, runtime) return stub.inputs, stub.outputs, stub.handler -def run_onnx(model: ModelProto, runtime): +def run_onnx(model: ModelProto, inputs: List[TensorProto]): stub = OnnxStub(model, runtime) + stub.fill_inputs(inputs) stub.run() + return stub.outputs def _parse_attribute(node: NodeProto, attrs: Dict[str, Any] = dict()) -> Dict[str, Any]: for attr in node.attribute: diff --git a/pyinfinitensor/tests/export_onnx.py b/pyinfinitensor/tests/export_onnx.py new file mode 100644 index 00000000..d4dc397a --- /dev/null +++ b/pyinfinitensor/tests/export_onnx.py @@ -0,0 +1,44 @@ +import torch +import torchvision +import os +from models.drn import drn_a_50, drn_c_26 + + +def export_model(modelfn, file_name): + files = os.listdir() + file = next( + (name for name in files if name.endswith(file_name)), None + ) + if file == None: + # Load the pretrained model + model = modelfn(pretrained = True) + # Set the model to evaluation mode + model.eval() + + # Create a sample input tensor + input_tensor = torch.randn(1, 3, 224, 224) + # Export the model to ONNX format + torch.onnx.export(model, input_tensor, file_name) + + + +from transformers import BertTokenizer, BertModel +def export_bert(): + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + model = BertModel.from_pretrained("bert-base-uncased") + text = "Replace me by any text you'd like." + encoded_input = tokenizer(text, return_tensors='pt') + #output = model(**encoded_input) + #inputs = torch.randn(1, 128, dtype=torch.float32) + input_names = ["input_ids", "attention_mask", "token_type_ids"] + torch.onnx.export(model,tuple(encoded_input.values()), + f="bert.onnx", + input_names=input_names, + + output_names=["output"]) + # https://huggingface.co/blog/convert-transformers-to-onnx#4-how-can-i-convert-a-transformers-model-bert-to-onnx + +#export_model(torchvision.models.resnet18, "resnet18.onnx") +#export_model(torchvision.models.resnet50, "resnet50.onnx") +#export_model(drn_c_26, "drn_c_26.onnx") +export_model(torchvision.models.inception_v3, "inception_v3.onnx") diff --git a/pyinfinitensor/tests/models/drn.py b/pyinfinitensor/tests/models/drn.py new file mode 100644 index 00000000..0cb4e8ec --- /dev/null +++ b/pyinfinitensor/tests/models/drn.py @@ -0,0 +1,416 @@ +# source from https://github.com/fyu/drn/blob/master/drn.py + +import pdb + +import torch.nn as nn +import math +import torch.utils.model_zoo as model_zoo + +BatchNorm = nn.BatchNorm2d + + +# __all__ = ['DRN', 'drn26', 'drn42', 'drn58'] + + +webroot = 'http://dl.yf.io/drn/' + +model_urls = { + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'drn-c-26': webroot + 'drn_c_26-ddedf421.pth', + 'drn-c-42': webroot + 'drn_c_42-9d336e8c.pth', + 'drn-c-58': webroot + 'drn_c_58-0a53a92c.pth', + 'drn-d-22': webroot + 'drn_d_22-4bd2f8ea.pth', + 'drn-d-38': webroot + 'drn_d_38-eebb45f0.pth', + 'drn-d-54': webroot + 'drn_d_54-0e0534ff.pth', + 'drn-d-105': webroot + 'drn_d_105-12b40979.pth' +} + + +def conv3x3(in_planes, out_planes, stride=1, padding=1, dilation=1): + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=padding, bias=False, dilation=dilation) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, + dilation=(1, 1), residual=True): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride, + padding=dilation[0], dilation=dilation[0]) + self.bn1 = BatchNorm(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes, + padding=dilation[1], dilation=dilation[1]) + self.bn2 = BatchNorm(planes) + self.downsample = downsample + self.stride = stride + self.residual = residual + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + if self.residual: + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, + dilation=(1, 1), residual=True): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = BatchNorm(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=dilation[1], bias=False, + dilation=dilation[1]) + self.bn2 = BatchNorm(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = BatchNorm(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class DRN(nn.Module): + + def __init__(self, block, layers, num_classes=1000, + channels=(16, 32, 64, 128, 256, 512, 512, 512), + out_map=False, out_middle=False, pool_size=28, arch='D'): + super(DRN, self).__init__() + self.inplanes = channels[0] + self.out_map = out_map + self.out_dim = channels[-1] + self.out_middle = out_middle + self.arch = arch + + if arch == 'C': + self.conv1 = nn.Conv2d(3, channels[0], kernel_size=7, stride=1, + padding=3, bias=False) + self.bn1 = BatchNorm(channels[0]) + self.relu = nn.ReLU(inplace=True) + + self.layer1 = self._make_layer( + BasicBlock, channels[0], layers[0], stride=1) + self.layer2 = self._make_layer( + BasicBlock, channels[1], layers[1], stride=2) + elif arch == 'D': + self.layer0 = nn.Sequential( + nn.Conv2d(3, channels[0], kernel_size=7, stride=1, padding=3, + bias=False), + BatchNorm(channels[0]), + nn.ReLU(inplace=True) + ) + + self.layer1 = self._make_conv_layers( + channels[0], layers[0], stride=1) + self.layer2 = self._make_conv_layers( + channels[1], layers[1], stride=2) + + self.layer3 = self._make_layer(block, channels[2], layers[2], stride=2) + self.layer4 = self._make_layer(block, channels[3], layers[3], stride=2) + self.layer5 = self._make_layer(block, channels[4], layers[4], + dilation=2, new_level=False) + self.layer6 = None if layers[5] == 0 else \ + self._make_layer(block, channels[5], layers[5], dilation=4, + new_level=False) + + if arch == 'C': + self.layer7 = None if layers[6] == 0 else \ + self._make_layer(BasicBlock, channels[6], layers[6], dilation=2, + new_level=False, residual=False) + self.layer8 = None if layers[7] == 0 else \ + self._make_layer(BasicBlock, channels[7], layers[7], dilation=1, + new_level=False, residual=False) + elif arch == 'D': + self.layer7 = None if layers[6] == 0 else \ + self._make_conv_layers(channels[6], layers[6], dilation=2) + self.layer8 = None if layers[7] == 0 else \ + self._make_conv_layers(channels[7], layers[7], dilation=1) + + if num_classes > 0: + self.avgpool = nn.AvgPool2d(pool_size) + self.fc = nn.Conv2d(self.out_dim, num_classes, kernel_size=1, + stride=1, padding=0, bias=True) + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, BatchNorm): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1, dilation=1, + new_level=True, residual=True): + assert dilation == 1 or dilation % 2 == 0 + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + BatchNorm(planes * block.expansion), + ) + + layers = list() + layers.append(block( + self.inplanes, planes, stride, downsample, + dilation=(1, 1) if dilation == 1 else ( + dilation // 2 if new_level else dilation, dilation), + residual=residual)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, residual=residual, + dilation=(dilation, dilation))) + + return nn.Sequential(*layers) + + def _make_conv_layers(self, channels, convs, stride=1, dilation=1): + modules = [] + for i in range(convs): + modules.extend([ + nn.Conv2d(self.inplanes, channels, kernel_size=3, + stride=stride if i == 0 else 1, + padding=dilation, bias=False, dilation=dilation), + BatchNorm(channels), + nn.ReLU(inplace=True)]) + self.inplanes = channels + return nn.Sequential(*modules) + + def forward(self, x): + y = list() + + if self.arch == 'C': + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + elif self.arch == 'D': + x = self.layer0(x) + + x = self.layer1(x) + y.append(x) + x = self.layer2(x) + y.append(x) + + x = self.layer3(x) + y.append(x) + + x = self.layer4(x) + y.append(x) + + x = self.layer5(x) + y.append(x) + + if self.layer6 is not None: + x = self.layer6(x) + y.append(x) + + if self.layer7 is not None: + x = self.layer7(x) + y.append(x) + + if self.layer8 is not None: + x = self.layer8(x) + y.append(x) + + if self.out_map: + x = self.fc(x) + else: + x = self.avgpool(x) + x = self.fc(x) + x = x.view(x.size(0), -1) + + if self.out_middle: + return x, y + else: + return x + + +class DRN_A(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(DRN_A, self).__init__() + self.out_dim = 512 * block.expansion + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=1, + dilation=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=1, + dilation=4) + self.avgpool = nn.AvgPool2d(28, stride=1) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, BatchNorm): + m.weight.data.fill_(1) + m.bias.data.zero_() + + # for m in self.modules(): + # if isinstance(m, nn.Conv2d): + # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + # elif isinstance(m, nn.BatchNorm2d): + # nn.init.constant_(m.weight, 1) + # nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilation=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, + dilation=(dilation, dilation))) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + + +def drn_a_50(pretrained=False, **kwargs): + model = DRN_A(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) + return model + + +def drn_c_26(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-c-26'])) + return model + + +def drn_c_42(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-c-42'])) + return model + + +def drn_c_58(pretrained=False, **kwargs): + model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-c-58'])) + return model + + +def drn_d_22(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-d-22'])) + return model + + +def drn_d_24(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 2, 2], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-d-24'])) + return model + + +def drn_d_38(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-d-38'])) + return model + + +def drn_d_40(pretrained=False, **kwargs): + model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-d-40'])) + return model + + +def drn_d_54(pretrained=False, **kwargs): + model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-d-54'])) + return model + + +def drn_d_56(pretrained=False, **kwargs): + model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-d-56'])) + return model + + +def drn_d_105(pretrained=False, **kwargs): + model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 1, 1], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-d-105'])) + return model + + +def drn_d_107(pretrained=False, **kwargs): + model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 2, 2], arch='D', **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['drn-d-107'])) + return model diff --git a/pyinfinitensor/tests/test_model.py b/pyinfinitensor/tests/test_model.py new file mode 100644 index 00000000..58042703 --- /dev/null +++ b/pyinfinitensor/tests/test_model.py @@ -0,0 +1,82 @@ +import os, onnx, unittest +from typing import Dict +import numpy as np +import torch +import torchvision +import torchvision.transforms as transforms +from PIL import Image +from onnx import TensorProto +from onnx.helper import ( + make_model, + make_node, + make_tensor, + make_graph, + make_tensor_value_info, +) +from onnx.checker import check_model +from pyinfinitensor.onnx import from_onnx, backend, run_onnx +import onnxruntime + +def pre_process(img) : + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + transfn = transforms.Compose([transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ]) + #img = np.transpose(img,(1,2,0)) + return transfn(img) + +def model_run(onnx_file_name): + dir_path = "./" + files = os.listdir(dir_path) + model_file = next( + (name for name in files if name.endswith(onnx_file_name)), None + ) + + if model_file != None: + model_path = os.path.join(dir_path, model_file) + print( + "model: {file}({size:.2f} MiB)".format( + file=model_path, size=os.path.getsize(model_path) / 1024 / 1024 + ) + ) + model = onnx.load(model_path) + check_model(model) + + session = onnxruntime.InferenceSession(model.SerializeToString(), None) + input_name = session.get_inputs()[0].name + input_tensor = make_tensor(input_name, TensorProto.FLOAT, input_data.shape, input_data) + run_onnx(model, [input_tensor]) + +def get_img_data(): + dir_path = "./" + files = os.listdir(dir_path) + img_file = next( + (name for name in files if name.endswith(".jpg")), None + ) + if img_file != None: + img_path = os.path.join(dir_path, img_file) + try: + img = Image.open(img_path) + input_data = pre_process(img) + img.close() + except FileNotFoundError: + print(f"Image not found: {img_file}") + else: + return input_data.numpy().astype(np.float32) +class TestStringMethods(unittest.TestCase): + def test_model_run(self): + #input_data = get_img_data() + input_data = np.random.rand(1,3,224,224).astype(np.float32) + model_run("resnet18.onnx", input_data) + model_run("resnet50.onnx", input_data) + model_run("drn_c_26.onnx", input_data) + + + + + +if __name__ == "__main__": + unittest.main() diff --git a/pyinfinitensor/tests/test_onnx.py b/pyinfinitensor/tests/test_onnx.py index 99ad3b9d..9b651370 100644 --- a/pyinfinitensor/tests/test_onnx.py +++ b/pyinfinitensor/tests/test_onnx.py @@ -1,4 +1,7 @@ import os, onnx, unittest +from typing import Dict +import numpy as np +import onnxruntime from onnx import TensorProto from onnx.helper import ( make_model, @@ -16,20 +19,7 @@ def make_and_import_model(graph: onnx.GraphProto): check_model(model) from_onnx(model, runtime) - class TestStringMethods(unittest.TestCase): - #def test_run(self): - # model_file = next( - # (name for name in os.listdir() if name.endswith(".onnx")), None - # ) - # if model_file != None: - # print( - # "model: {file}({size:.2f} MiB)".format( - # file=model_file, size=os.path.getsize(model_file) / 1024 / 1024 - # ) - # ) - # run_onnx(onnx.load(model_file), runtime) - def test_load(self): model_file = next( (name for name in os.listdir() if name.endswith(".onnx")), None @@ -317,5 +307,6 @@ class TestStringMethods(unittest.TestCase): handler.reshape(x, y, [3, 2, 1]) + if __name__ == "__main__": unittest.main() diff --git a/src/core/graph_handler.cc b/src/core/graph_handler.cc index 424ca276..f8bf7677 100644 --- a/src/core/graph_handler.cc +++ b/src/core/graph_handler.cc @@ -1,6 +1,7 @@ #include "core/graph_handler.h" #include "operators/batch_norm.h" #include "operators/concat.h" +#include "operators/constant.h" #include "operators/conv.h" #include "operators/element_wise.h" #include "operators/gather.h" @@ -12,6 +13,7 @@ #include "operators/slice.h" #include "operators/softmax.h" #include "operators/unary.h" +#include "operators/unsqueeze.h" namespace infini { @@ -227,6 +229,24 @@ Tensor GraphHandlerObj::pad(Tensor input, Tensor output, } } +Tensor GraphHandlerObj::constant(Tensor output) { + if (output) { + g->addOpWithOutputs(output); + return output; + } else + return g->addOp(output)->getOutput(); +} + +Tensor GraphHandlerObj::unsqueeze(Tensor input, const vector &axes, + Tensor output) { + if (output) { + g->addOpWithOutputs(std::move(input), axes, output); + return output; + } else + return g->addOp(std::move(input), axes, output) + ->getOutput(); +} + static DataType dtype_repr_convert(int dtype) { switch ((OnnxDType)dtype) { case OnnxDType::FLOAT: @@ -243,8 +263,11 @@ static DataType dtype_repr_convert(int dtype) { return DataType::Int16; case OnnxDType::INT32: return DataType::Int32; + // TODO: conver Int64 to int32, because some backend like dnnl donot support + // int64 case OnnxDType::INT64: - return DataType::Int64; + // return DataType::Int64; + return DataType::Int32; default: IT_ASSERT(false, "Unsupported data type"); } diff --git a/src/core/tensor.cc b/src/core/tensor.cc index cdcd9e28..f7e710f9 100644 --- a/src/core/tensor.cc +++ b/src/core/tensor.cc @@ -62,7 +62,11 @@ void TensorObj::printData() const { if (dtype == DataType::Float32) printDataFloat(); else if (dtype == DataType::UInt32) - printDataUint32_t(); + printDataInteger(); + else if (dtype == DataType::Int32) + printDataInteger(); + else if (dtype == DataType::Int64) + printDataInteger(); else IT_TODO_HALT(); } @@ -94,12 +98,12 @@ void TensorObj::printDataFloat() const { } } -void TensorObj::printDataUint32_t() const { +template void TensorObj::printDataInteger() const { IT_ASSERT(data != nullptr); std::cout << "Tensor: " << guid << std::endl; auto numDims = shape.size(); auto dimSzVec = std::vector(numDims, 1); - auto ptr = data->getPtr(); + auto ptr = data->getPtr(); dimSzVec[numDims - 1] = shape[numDims - 1]; for (int i = numDims - 1; i != 0; --i) dimSzVec[i - 1] = dimSzVec[i] * shape[i - 1]; @@ -133,9 +137,15 @@ bool TensorObj::equalData(const Tensor &rhs) const { if (getDType() == DataType::UInt32) return equalDataImpl(getRawDataPtr(), rhs->getRawDataPtr(), size()); + if (getDType() == DataType::Int32) + return equalDataImpl(getRawDataPtr(), + rhs->getRawDataPtr(), size()); else if (getDType() == DataType::Float32) return equalDataImpl(getRawDataPtr(), rhs->getRawDataPtr(), size()); + else if (getDType() == DataType::Int64) + return equalDataImpl(getRawDataPtr(), + rhs->getRawDataPtr(), size()); else IT_TODO_HALT(); } diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 1b58abd5..bed9ed4f 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -195,6 +195,7 @@ void init_graph_builder(py::module &m) { py::class_>(m, "Tensor") .def("fuid", &TensorObj::getFuid, policy::automatic) .def("shape", &TensorObj::getDims, policy::move) + .def("data_malloc", &TensorObj::dataMalloc, policy::move) .def("copyin_float", &TensorObj::copyin, policy::move) .def("copyin_int32", &TensorObj::copyin, policy::move) .def("copyin_int64", &TensorObj::copyin, policy::move) @@ -236,6 +237,8 @@ void init_graph_builder(py::module &m) { .def("gather", &Handler::gather, policy::move) .def("reduce_mean", &Handler::reduceMean, policy::move) .def("slice", &Handler::slice, policy::move) + .def("constant", &Handler::constant, policy::move) + .def("unsqueeze", &Handler::unsqueeze, policy::move) .def("pad", &Handler::pad, policy::move) .def("topo_sort", &Handler::topo_sort, policy::automatic) .def("operators", &Handler::operators, policy::move) diff --git a/src/kernels/intelcpu/constant.cc b/src/kernels/intelcpu/constant.cc new file mode 100644 index 00000000..69d5d39d --- /dev/null +++ b/src/kernels/intelcpu/constant.cc @@ -0,0 +1,15 @@ +#include "operators/constant.h" +#include "intelcpu/mkl_kernel_without_config.h" + +namespace infini { +class ConstantKernel : public MklKernelWithoutConfig { + void compute(const Operator &_op, + const RuntimeObj *context) const override { + ; + } +}; +REGISTER_KERNEL(Device::INTELCPU, OpType::Constant, DataType::Int32, + ConstantKernel, "Constant_Mkl_Int32"); +REGISTER_KERNEL(Device::INTELCPU, OpType::Constant, DataType::Float32, + ConstantKernel, "Constant_Mkl_Float32"); +} // namespace infini diff --git a/src/kernels/intelcpu/gather.cc b/src/kernels/intelcpu/gather.cc index a95ece4e..b5b40a50 100644 --- a/src/kernels/intelcpu/gather.cc +++ b/src/kernels/intelcpu/gather.cc @@ -38,12 +38,12 @@ class MklGather : public MklKernelWithoutConfig { sycl::queue q(sycl::cpu_selector{}); auto inDevice = sycl::malloc_device(iSize, q); - auto indexDevice = sycl::malloc_device(idxSize, q); + auto indexDevice = sycl::malloc_device(idxSize, q); auto outDevice = sycl::malloc_device(oSize, q); q.memcpy(inDevice, in->getRawDataPtr(), iSize * sizeof(float)); - q.memcpy(indexDevice, index->getRawDataPtr(), - idxSize * sizeof(uint32_t)); + q.memcpy(indexDevice, index->getRawDataPtr(), + idxSize * sizeof(int32_t)); q.wait(); q.parallel_for(sycl::range<1>(oSize), [=](sycl::id<1> index) { diff --git a/src/kernels/intelcpu/matmul_dpcpp.cc b/src/kernels/intelcpu/matmul_dpcpp.cc index fd77ee39..6d07f0d8 100644 --- a/src/kernels/intelcpu/matmul_dpcpp.cc +++ b/src/kernels/intelcpu/matmul_dpcpp.cc @@ -53,9 +53,9 @@ template class MklDpcppMatmul : public CpuKernelWithoutConfig { // create execution queue and buffers of matrix data cl::sycl::queue main_queue(sycl::cpu_selector{}, exception_handler); - cl::sycl::buffer A_buffer(A, op->getInputs(0)->size()); - cl::sycl::buffer B_buffer(B, op->getInputs(1)->size()); - cl::sycl::buffer C_buffer(C, op->getOutput(0)->size()); + cl::sycl::buffer A_buffer(A, op->getInputs(0)->size()); + cl::sycl::buffer B_buffer(B, op->getInputs(1)->size()); + cl::sycl::buffer C_buffer(C, op->getOutput(0)->size()); // add oneapi::mkl::blas::gemm to execution queue try { diff --git a/src/kernels/intelcpu/reshape.cc b/src/kernels/intelcpu/reshape.cc index bddef40f..c85afd26 100644 --- a/src/kernels/intelcpu/reshape.cc +++ b/src/kernels/intelcpu/reshape.cc @@ -1,7 +1,7 @@ #include "operators/reshape.h" #include "intelcpu/mkl_kernel_without_config.h" #include "intelcpu/mkl_runtime.h" - +#include "operators/unsqueeze.h" namespace infini { class MklReshape : public MklKernelWithoutConfig { void compute(const Operator &op, @@ -47,4 +47,6 @@ REGISTER_KERNEL(Device::INTELCPU, OpType::Identity, DataType::Float32, MklReshape, "Identify_Mkl_Float32"); REGISTER_KERNEL(Device::INTELCPU, OpType::Flatten, DataType::Float32, MklReshape, "Flatten_Mkl_Float32"); +REGISTER_KERNEL(Device::INTELCPU, OpType::Unsqueeze, DataType::Float32, + MklReshape, "Unsqueeze_Mkl_Float32"); }; // namespace infini diff --git a/src/operators/constant.cc b/src/operators/constant.cc new file mode 100644 index 00000000..21368492 --- /dev/null +++ b/src/operators/constant.cc @@ -0,0 +1,21 @@ +#include "operators/constant.h" + +namespace infini { +std::string ConstantObj::toString() const { + std::ostringstream os; + os << "Constant[" << getGuid() << "]"; + os << "output = " << outputs[0]->getGuid() << ","; + return os.str(); +} + +vector ConstantObj::getWorkloadVector() const { + vector ret = outputs[0]->getDims(); + ret.emplace(ret.begin(), enum_to_underlying(type)); + return ret; +} + +// need eps and momentum? +vector ConstantObj::getOpAttrVector() const { + return {enum_to_underlying(type)}; +} +} // namespace infini diff --git a/src/operators/gather.cc b/src/operators/gather.cc index afb4996e..3616db0b 100644 --- a/src/operators/gather.cc +++ b/src/operators/gather.cc @@ -28,7 +28,7 @@ optional> GatherObj::inferShape(const TensorVec &inputs) const { vector GatherObj::inferDataType(const TensorVec &inputs) const { IT_ASSERT(inputs.size() == 2); auto index = inputs[1]; - IT_ASSERT(index->getDType() == DataType::UInt32); + IT_ASSERT(index->getDType() == DataType::Int32); return {inputs[0]->getDType()}; } diff --git a/src/operators/unsqueeze.cc b/src/operators/unsqueeze.cc new file mode 100644 index 00000000..2bbc0bea --- /dev/null +++ b/src/operators/unsqueeze.cc @@ -0,0 +1,65 @@ +#include "operators/unsqueeze.h" + +namespace infini { +UnsqueezeObj::UnsqueezeObj(GraphObj *graph, Tensor in, + const std::vector &index, Tensor out) + : OperatorObj(OpType::Unsqueeze, {in}, {out}) { + IT_ASSERT(parseAxis(index, axis)); + IT_ASSERT(checkValid(graph)); +} + +bool UnsqueezeObj::parseAxis(const std::vector &index, + std::set &axis) const { + bool ret = true; + int nDim = inputs[0]->getDims().size() + index.size(); + for (size_t i = 0; i < index.size(); ++i) { + int data = index[i]; + if (data < 0) + data += nDim; + if (data >= nDim) { + ret = false; + break; + } + if (axis.find(data) != axis.end()) { + ret = false; + break; + } + axis.insert(data); + } + return ret; +} + +optional> +UnsqueezeObj::inferShape(const TensorVec &inputs) const { + Shape dims = inputs[0]->getDims(); + for (int i : axis) { + auto it = dims.begin(); + dims.insert(std::next(it, i), 1); + } + return {{dims}}; +} + +std::string UnsqueezeObj::toString() const { + std::ostringstream os; + os << "Unsqueeze[" << getGuid() << "]"; + os << "("; + os << "inputs="; + for (auto i = 0; i < numInputs(); i++) + os << inputs[i]->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ","; + os << ")"; + + return os.str(); +} + +vector UnsqueezeObj::getWorkloadVector() const { + vector ret = inputs[0]->getDims(); + ret.emplace(ret.begin(), enum_to_underlying(type)); + for (auto i : axis) + ret.emplace_back(i); + return ret; +} +vector UnsqueezeObj::getOpAttrVector() const { + return {enum_to_underlying(type)}; +} +} // namespace infini diff --git a/test/kernels/intelcpu/test_mkl_unsqueeze.cc b/test/kernels/intelcpu/test_mkl_unsqueeze.cc new file mode 100644 index 00000000..bf7bf7a9 --- /dev/null +++ b/test/kernels/intelcpu/test_mkl_unsqueeze.cc @@ -0,0 +1,26 @@ +#include "core/graph.h" +#include "core/runtime.h" +#include "intelcpu/mkl_runtime.h" +#include "operators/unsqueeze.h" + +#include "test.h" + +namespace infini { + +TEST(Unsqueeze, Mkl) { + Runtime runtime = MklRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + + auto input = g->addTensor({2, 3, 3}, DataType::Float32); + vector index{1, 0}; + auto op = g->addOp(input, index, nullptr); + g->dataMalloc(); + input->setData(IncrementalGenerator()); + + runtime->run(g); + + auto o = g->cloneTensor(op->getOutput(0)); + // check results + EXPECT_TRUE(o->equalData(input)); +} +} // namespace infini diff --git a/test/operators/test_unsqueeze.cc b/test/operators/test_unsqueeze.cc new file mode 100644 index 00000000..bc9e8db4 --- /dev/null +++ b/test/operators/test_unsqueeze.cc @@ -0,0 +1,29 @@ +#include "core/graph.h" +#include "core/runtime.h" +#include "operators/unsqueeze.h" + +#include "test.h" + +namespace infini { +TEST(Unsqueeze, ShapeInfer) { + { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + auto input = g->addTensor({1, 3, 2, 15}, DataType::Float32); + vector index{1, 6, 0}; + auto op = g->addOp(input, index, nullptr); + + EXPECT_EQ(op->getOutput(0)->getDims(), (Shape{1, 1, 1, 3, 2, 15, 1})); + } + + { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + auto input = g->addTensor({1, 3, 2, 15}, DataType::Float32); + vector index{-6, -1, -7}; + auto op = g->addOp(input, index, nullptr); + + EXPECT_EQ(op->getOutput(0)->getDims(), (Shape{1, 1, 1, 3, 2, 15, 1})); + } +} +} // namespace infini