ADD unsqueeze

fix some bugs

fix some for onnx_stub

add Constant operator

Fix onnx data parse and onnx_run api , and add some tests for model.
This commit is contained in:
wangw 2023-04-17 15:07:21 +08:00
parent f2be122efc
commit f021afe199
24 changed files with 926 additions and 43 deletions

View File

@ -80,6 +80,8 @@ class GraphHandlerObj {
const optional<vector<int>> &steps);
Tensor pad(Tensor input, Tensor output, const vector<int> &pads,
const optional<vector<int>> &axes);
Tensor constant(Tensor output);
Tensor unsqueeze(Tensor input, const vector<int> &axes, Tensor output);
//------ modifiers

View File

@ -25,6 +25,7 @@ enum class OpType {
Mul,
Div,
Pow,
Sqrt,
Gather,
ReduceMean,
Reshape,
@ -39,6 +40,8 @@ enum class OpType {
Tanh,
Abs,
Resize,
Constant,
Unsqueeze,
//
MemBound = 300,
};

View File

@ -104,7 +104,7 @@ class TensorObj : public TensorBaseObj {
private:
void printDataFloat() const;
void printDataUint32_t() const;
template <typename T> void printDataInteger() const;
template <typename T>
bool equalDataImpl(const T *a, const T *b, size_t size) const {

View File

@ -0,0 +1,29 @@
#pragma once
#include "core/operator.h"
namespace infini {
class ConstantObj : public OperatorObj {
public:
ConstantObj(GraphObj *graph, Tensor output)
: OperatorObj(OpType::Constant, {}, {output}) {
IT_ASSERT(output);
IT_ASSERT(checkValid(graph));
}
OP_CLONE(ConstantObj);
optional<vector<Shape>> inferShape(const TensorVec &inputs) const {
return {{outputs[0]->getDims()}};
};
std::string toString() const override;
int numInputs() const override { return 0; }
int numOutputs() const override { return 1; }
void makeConstant() {}
private:
vector<int> getWorkloadVector() const override;
vector<int> getOpAttrVector() const override;
};
} // namespace infini

View File

@ -46,4 +46,5 @@ DEFINE_ELEMENT_WISE_OBJ(Sub, OpType::Sub)
DEFINE_ELEMENT_WISE_OBJ(Mul, OpType::Mul)
DEFINE_ELEMENT_WISE_OBJ(Div, OpType::Div)
DEFINE_ELEMENT_WISE_OBJ(Pow, OpType::Pow)
DEFINE_ELEMENT_WISE_OBJ(Sqrt, OpType::Sqrt)
}; // namespace infini

View File

@ -0,0 +1,23 @@
#include "core/operator.h"
namespace infini {
class UnsqueezeObj : public OperatorObj {
set<int> axis;
public:
UnsqueezeObj(GraphObj *graph, Tensor input, const vector<int> &axis,
Tensor output);
OP_CLONE(UnsqueezeObj);
optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
std::string toString() const override;
int numInputs() const override { return 1; }
int numOutputs() const override { return 1; }
private:
bool parseAxis(const std::vector<int> &index, std::set<int> &axis) const;
vector<int> getWorkloadVector() const override;
vector<int> getOpAttrVector() const override;
};
} // namespace infini

View File

@ -16,6 +16,8 @@ class DataGenerator {
fill(reinterpret_cast<uint32_t *>(data), size);
else if (dataType == DataType::Float32)
fill(reinterpret_cast<float *>(data), size);
else if (dataType == DataType::Int64)
fill(reinterpret_cast<long long *>(data), size);
else
IT_TODO_HALT();
}

View File

@ -1,4 +1,5 @@
import backend
import numpy as np
from onnx import (
ModelProto,
TensorProto,
@ -24,13 +25,16 @@ from onnx.checker import (
from onnx.shape_inference import infer_shapes
from typing import Dict, List, Any, Tuple, Sequence, Union, Optional
from functools import reduce
import struct
# TODO: do we need need runtime here
runtime = backend.runtime()
class OnnxStub:
inputs: Dict[str, backend.Tensor] = {}
outputs: Dict[str, backend.Tensor] = {}
initializer: Dict[int, TensorProto] = {}
inputs: Dict[str, backend.Tensor] = {} #只包含图最初的输入,不是包含每一层的输入
outputs: Dict[str, backend.Tensor] = {} #只包含图最后的输出
initializer: Dict[int, TensorProto] = {} #包含图每一层的权重和参数
tensors: Dict[int, TensorProto] = {} #包含所有tensor
handler: backend.GraphHandler
def __init__(self, model: ModelProto, runtime):
@ -39,13 +43,13 @@ class OnnxStub:
tensors: Dict[str, backend.Tensor] = dict()
data: Dict[str, TensorProto] = dict()
for input in model.graph.input:
dims = _take_shape_dim(input.type.tensor_type.shape)
tensors[input.name] = self.handler.tensor(
dims, input.type.tensor_type.elem_type
)
for output in model.graph.output:
dims = _take_shape_dim(output.type.tensor_type.shape)
tensors[output.name] = self.handler.tensor(
@ -54,6 +58,10 @@ class OnnxStub:
for initializer in model.graph.initializer:
data[initializer.name] = initializer
# weights and params are not in "model.graph.input" when .onnx is converted from .pth
tensors[initializer.name] = self.handler.tensor(
initializer.dims, initializer.data_type
)
for node in model.graph.node:
if node.op_type == "Conv":
@ -331,9 +339,30 @@ class OnnxStub:
_parse_data(data[node.input[1]]),
_parse_data(data[node.input[3]]) if len(node.input) > 3 else None,
)
elif node.op_type == "Constant":
attr = next((attr for attr in node.attribute if attr.name == "value"), None)
if attr == None:
raise Exception("no value in constant nodeproto")
if attr.type == 4: #TENSOR
value_tensor = self.handler.tensor(
[self.get_data_len(attr.t)], attr.t.data_type
)
value_tensor.data_malloc()
self.fill_value(value_tensor, attr.t)
tensors[node.output[0]] = self.handler.constant(
value_tensor
)
else :
raise Exception("TODO")
elif node.op_type == "Unsqueeze":
tensors[node.output[0]] = self.handler.unsqueeze(
tensors[node.input[0]],
_parse_data(data[node.input[1]]) if len(node.input) > 1 else next((attr.ints for attr in node.attribute if attr.name == "axes")),
tensors.get(node.output[0]))
else:
print(node)
raise Exception('Unsupported operator "{}"'.format(node.op_type))
self.handler.data_malloc()
for name, obj in tensors.items():
@ -341,19 +370,84 @@ class OnnxStub:
if tensor == None:
if any(input.name == name for input in model.graph.input):
self.inputs[name] = obj
else:
if tensor != None:
self.initializer[obj.fuid()] = tensor
if tensor.data_type == TensorProto.INT32:
obj.copyin_int32([int(i) for i in tensor.int32_data])
elif tensor.data_type == TensorProto.INT64:
obj.copyin_int64([int(i) for i in tensor.int64_data])
elif tensor.data_type == TensorProto.FLOAT:
obj.copyin_float([int(i) for i in tensor.float_data])
else:
assert False, "Unsupported Tensor Type: {}".format(tensor.data_type)
self.fill_value(obj, tensor)
for output in model.graph.output:
self.outputs[output.name] = tensors[output.name]
self.outputs[output.name] = tensors[output.name]
self.tensors = tensors
def get_data_len(self, tensor)-> int:
length = 0
ele_size=1
if tensor.data_type == TensorProto.INT32:
length = len(tensor.int32_data)
ele_size = 4
elif tensor.data_type == TensorProto.INT64:
length = len(tensor.int64_data)
ele_size = 8
elif tensor.data_type == TensorProto.FLOAT:
length = len(tensor.float_data)
ele_size = 4
else:
assert False, "Unsupported Tensor Type: {}".format(tensor.data_type)
if length == 0:
length = len(tensor.raw_data) // ele_size
return length
def fill_inputs(self, inputs: List[TensorProto]):
for tensor in inputs:
obj = self.inputs[tensor.name]
self.fill_value(obj, tensor)
def fill_value(self, obj, tensor):
# Data may be stored in raw_data when .onnx is converted from .pth
# or when parsing Constant oprator
if tensor.data_type == TensorProto.INT32:
int32_data = tensor.int32_data
if len(tensor.int32_data) == 0:
int32_data = self.parse_data(tensor.raw_data, tensor.data_type)
obj.copyin_int32([int(i) for i in tensor.int32_data])
elif tensor.data_type == TensorProto.INT64:
int64_data = tensor.int64_data
if len(tensor.int64_data) == 0:
int64_data = self.parse_data(tensor.raw_data, tensor.data_type)
# todo: convert int64 to int32, because backend donot support int64!
obj.copyin_int32([np.int32(i) for i in int64_data])
elif tensor.data_type == TensorProto.FLOAT:
float_data = tensor.float_data
if len(float_data) == 0:
float_data = self.parse_data(tensor.raw_data, tensor.data_type)
obj.copyin_float([i for i in float_data])
else:
assert False, "Unsupported Tensor Type: {}".format(tensor.data_type)
def parse_data(self, raw_data, dtype):
if dtype == TensorProto.FLOAT:
fmt = 'f'
elem_size = 4
elif dtype == TensorProto.FLOAT16:
fmt = 'e'
elem_size = 2
elif dtype == TensorProto.INT32:
fmt = 'i'
elem_size = 4
elif dtype == TensorProto.INT64:
fmt = 'q'
elem_size = 8
else:
raise ValueError('Unsupported data type')
num_elems = len(raw_data) // elem_size
data = []
for i in range(num_elems):
start_idx = i * elem_size
end_idx = start_idx + elem_size
elem_bytes = raw_data[start_idx:end_idx]
elem_value = struct.unpack(fmt, elem_bytes)[0]
data.append(elem_value)
return data
def to_onnx(self, name: str) -> ModelProto:
class Context:
@ -576,9 +670,11 @@ def from_onnx(model: ModelProto, runtime):
stub = OnnxStub(model, runtime)
return stub.inputs, stub.outputs, stub.handler
def run_onnx(model: ModelProto, runtime):
def run_onnx(model: ModelProto, inputs: List[TensorProto]):
stub = OnnxStub(model, runtime)
stub.fill_inputs(inputs)
stub.run()
return stub.outputs
def _parse_attribute(node: NodeProto, attrs: Dict[str, Any] = dict()) -> Dict[str, Any]:
for attr in node.attribute:

View File

@ -0,0 +1,44 @@
import torch
import torchvision
import os
from models.drn import drn_a_50, drn_c_26
def export_model(modelfn, file_name):
files = os.listdir()
file = next(
(name for name in files if name.endswith(file_name)), None
)
if file == None:
# Load the pretrained model
model = modelfn(pretrained = True)
# Set the model to evaluation mode
model.eval()
# Create a sample input tensor
input_tensor = torch.randn(1, 3, 224, 224)
# Export the model to ONNX format
torch.onnx.export(model, input_tensor, file_name)
from transformers import BertTokenizer, BertModel
def export_bert():
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained("bert-base-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt')
#output = model(**encoded_input)
#inputs = torch.randn(1, 128, dtype=torch.float32)
input_names = ["input_ids", "attention_mask", "token_type_ids"]
torch.onnx.export(model,tuple(encoded_input.values()),
f="bert.onnx",
input_names=input_names,
output_names=["output"])
# https://huggingface.co/blog/convert-transformers-to-onnx#4-how-can-i-convert-a-transformers-model-bert-to-onnx
#export_model(torchvision.models.resnet18, "resnet18.onnx")
#export_model(torchvision.models.resnet50, "resnet50.onnx")
#export_model(drn_c_26, "drn_c_26.onnx")
export_model(torchvision.models.inception_v3, "inception_v3.onnx")

View File

@ -0,0 +1,416 @@
# source from https://github.com/fyu/drn/blob/master/drn.py
import pdb
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
BatchNorm = nn.BatchNorm2d
# __all__ = ['DRN', 'drn26', 'drn42', 'drn58']
webroot = 'http://dl.yf.io/drn/'
model_urls = {
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'drn-c-26': webroot + 'drn_c_26-ddedf421.pth',
'drn-c-42': webroot + 'drn_c_42-9d336e8c.pth',
'drn-c-58': webroot + 'drn_c_58-0a53a92c.pth',
'drn-d-22': webroot + 'drn_d_22-4bd2f8ea.pth',
'drn-d-38': webroot + 'drn_d_38-eebb45f0.pth',
'drn-d-54': webroot + 'drn_d_54-0e0534ff.pth',
'drn-d-105': webroot + 'drn_d_105-12b40979.pth'
}
def conv3x3(in_planes, out_planes, stride=1, padding=1, dilation=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=padding, bias=False, dilation=dilation)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None,
dilation=(1, 1), residual=True):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride,
padding=dilation[0], dilation=dilation[0])
self.bn1 = BatchNorm(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes,
padding=dilation[1], dilation=dilation[1])
self.bn2 = BatchNorm(planes)
self.downsample = downsample
self.stride = stride
self.residual = residual
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
if self.residual:
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None,
dilation=(1, 1), residual=True):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = BatchNorm(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=dilation[1], bias=False,
dilation=dilation[1])
self.bn2 = BatchNorm(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = BatchNorm(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class DRN(nn.Module):
def __init__(self, block, layers, num_classes=1000,
channels=(16, 32, 64, 128, 256, 512, 512, 512),
out_map=False, out_middle=False, pool_size=28, arch='D'):
super(DRN, self).__init__()
self.inplanes = channels[0]
self.out_map = out_map
self.out_dim = channels[-1]
self.out_middle = out_middle
self.arch = arch
if arch == 'C':
self.conv1 = nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
padding=3, bias=False)
self.bn1 = BatchNorm(channels[0])
self.relu = nn.ReLU(inplace=True)
self.layer1 = self._make_layer(
BasicBlock, channels[0], layers[0], stride=1)
self.layer2 = self._make_layer(
BasicBlock, channels[1], layers[1], stride=2)
elif arch == 'D':
self.layer0 = nn.Sequential(
nn.Conv2d(3, channels[0], kernel_size=7, stride=1, padding=3,
bias=False),
BatchNorm(channels[0]),
nn.ReLU(inplace=True)
)
self.layer1 = self._make_conv_layers(
channels[0], layers[0], stride=1)
self.layer2 = self._make_conv_layers(
channels[1], layers[1], stride=2)
self.layer3 = self._make_layer(block, channels[2], layers[2], stride=2)
self.layer4 = self._make_layer(block, channels[3], layers[3], stride=2)
self.layer5 = self._make_layer(block, channels[4], layers[4],
dilation=2, new_level=False)
self.layer6 = None if layers[5] == 0 else \
self._make_layer(block, channels[5], layers[5], dilation=4,
new_level=False)
if arch == 'C':
self.layer7 = None if layers[6] == 0 else \
self._make_layer(BasicBlock, channels[6], layers[6], dilation=2,
new_level=False, residual=False)
self.layer8 = None if layers[7] == 0 else \
self._make_layer(BasicBlock, channels[7], layers[7], dilation=1,
new_level=False, residual=False)
elif arch == 'D':
self.layer7 = None if layers[6] == 0 else \
self._make_conv_layers(channels[6], layers[6], dilation=2)
self.layer8 = None if layers[7] == 0 else \
self._make_conv_layers(channels[7], layers[7], dilation=1)
if num_classes > 0:
self.avgpool = nn.AvgPool2d(pool_size)
self.fc = nn.Conv2d(self.out_dim, num_classes, kernel_size=1,
stride=1, padding=0, bias=True)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1, dilation=1,
new_level=True, residual=True):
assert dilation == 1 or dilation % 2 == 0
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
BatchNorm(planes * block.expansion),
)
layers = list()
layers.append(block(
self.inplanes, planes, stride, downsample,
dilation=(1, 1) if dilation == 1 else (
dilation // 2 if new_level else dilation, dilation),
residual=residual))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, residual=residual,
dilation=(dilation, dilation)))
return nn.Sequential(*layers)
def _make_conv_layers(self, channels, convs, stride=1, dilation=1):
modules = []
for i in range(convs):
modules.extend([
nn.Conv2d(self.inplanes, channels, kernel_size=3,
stride=stride if i == 0 else 1,
padding=dilation, bias=False, dilation=dilation),
BatchNorm(channels),
nn.ReLU(inplace=True)])
self.inplanes = channels
return nn.Sequential(*modules)
def forward(self, x):
y = list()
if self.arch == 'C':
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
elif self.arch == 'D':
x = self.layer0(x)
x = self.layer1(x)
y.append(x)
x = self.layer2(x)
y.append(x)
x = self.layer3(x)
y.append(x)
x = self.layer4(x)
y.append(x)
x = self.layer5(x)
y.append(x)
if self.layer6 is not None:
x = self.layer6(x)
y.append(x)
if self.layer7 is not None:
x = self.layer7(x)
y.append(x)
if self.layer8 is not None:
x = self.layer8(x)
y.append(x)
if self.out_map:
x = self.fc(x)
else:
x = self.avgpool(x)
x = self.fc(x)
x = x.view(x.size(0), -1)
if self.out_middle:
return x, y
else:
return x
class DRN_A(nn.Module):
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64
super(DRN_A, self).__init__()
self.out_dim = 512 * block.expansion
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
dilation=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
dilation=4)
self.avgpool = nn.AvgPool2d(28, stride=1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm):
m.weight.data.fill_(1)
m.bias.data.zero_()
# for m in self.modules():
# if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# elif isinstance(m, nn.BatchNorm2d):
# nn.init.constant_(m.weight, 1)
# nn.init.constant_(m.bias, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes,
dilation=(dilation, dilation)))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def drn_a_50(pretrained=False, **kwargs):
model = DRN_A(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
return model
def drn_c_26(pretrained=False, **kwargs):
model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-c-26']))
return model
def drn_c_42(pretrained=False, **kwargs):
model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-c-42']))
return model
def drn_c_58(pretrained=False, **kwargs):
model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-c-58']))
return model
def drn_d_22(pretrained=False, **kwargs):
model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='D', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-22']))
return model
def drn_d_24(pretrained=False, **kwargs):
model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 2, 2], arch='D', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-24']))
return model
def drn_d_38(pretrained=False, **kwargs):
model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-38']))
return model
def drn_d_40(pretrained=False, **kwargs):
model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-40']))
return model
def drn_d_54(pretrained=False, **kwargs):
model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-54']))
return model
def drn_d_56(pretrained=False, **kwargs):
model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-56']))
return model
def drn_d_105(pretrained=False, **kwargs):
model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 1, 1], arch='D', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-105']))
return model
def drn_d_107(pretrained=False, **kwargs):
model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 2, 2], arch='D', **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-107']))
return model

View File

@ -0,0 +1,82 @@
import os, onnx, unittest
from typing import Dict
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from PIL import Image
from onnx import TensorProto
from onnx.helper import (
make_model,
make_node,
make_tensor,
make_graph,
make_tensor_value_info,
)
from onnx.checker import check_model
from pyinfinitensor.onnx import from_onnx, backend, run_onnx
import onnxruntime
def pre_process(img) :
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
transfn = transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
])
#img = np.transpose(img,(1,2,0))
return transfn(img)
def model_run(onnx_file_name):
dir_path = "./"
files = os.listdir(dir_path)
model_file = next(
(name for name in files if name.endswith(onnx_file_name)), None
)
if model_file != None:
model_path = os.path.join(dir_path, model_file)
print(
"model: {file}({size:.2f} MiB)".format(
file=model_path, size=os.path.getsize(model_path) / 1024 / 1024
)
)
model = onnx.load(model_path)
check_model(model)
session = onnxruntime.InferenceSession(model.SerializeToString(), None)
input_name = session.get_inputs()[0].name
input_tensor = make_tensor(input_name, TensorProto.FLOAT, input_data.shape, input_data)
run_onnx(model, [input_tensor])
def get_img_data():
dir_path = "./"
files = os.listdir(dir_path)
img_file = next(
(name for name in files if name.endswith(".jpg")), None
)
if img_file != None:
img_path = os.path.join(dir_path, img_file)
try:
img = Image.open(img_path)
input_data = pre_process(img)
img.close()
except FileNotFoundError:
print(f"Image not found: {img_file}")
else:
return input_data.numpy().astype(np.float32)
class TestStringMethods(unittest.TestCase):
def test_model_run(self):
#input_data = get_img_data()
input_data = np.random.rand(1,3,224,224).astype(np.float32)
model_run("resnet18.onnx", input_data)
model_run("resnet50.onnx", input_data)
model_run("drn_c_26.onnx", input_data)
if __name__ == "__main__":
unittest.main()

View File

@ -1,4 +1,7 @@
import os, onnx, unittest
from typing import Dict
import numpy as np
import onnxruntime
from onnx import TensorProto
from onnx.helper import (
make_model,
@ -16,20 +19,7 @@ def make_and_import_model(graph: onnx.GraphProto):
check_model(model)
from_onnx(model, runtime)
class TestStringMethods(unittest.TestCase):
#def test_run(self):
# model_file = next(
# (name for name in os.listdir() if name.endswith(".onnx")), None
# )
# if model_file != None:
# print(
# "model: {file}({size:.2f} MiB)".format(
# file=model_file, size=os.path.getsize(model_file) / 1024 / 1024
# )
# )
# run_onnx(onnx.load(model_file), runtime)
def test_load(self):
model_file = next(
(name for name in os.listdir() if name.endswith(".onnx")), None
@ -317,5 +307,6 @@ class TestStringMethods(unittest.TestCase):
handler.reshape(x, y, [3, 2, 1])
if __name__ == "__main__":
unittest.main()

View File

@ -1,6 +1,7 @@
#include "core/graph_handler.h"
#include "operators/batch_norm.h"
#include "operators/concat.h"
#include "operators/constant.h"
#include "operators/conv.h"
#include "operators/element_wise.h"
#include "operators/gather.h"
@ -12,6 +13,7 @@
#include "operators/slice.h"
#include "operators/softmax.h"
#include "operators/unary.h"
#include "operators/unsqueeze.h"
namespace infini {
@ -227,6 +229,24 @@ Tensor GraphHandlerObj::pad(Tensor input, Tensor output,
}
}
Tensor GraphHandlerObj::constant(Tensor output) {
if (output) {
g->addOpWithOutputs<ConstantObj>(output);
return output;
} else
return g->addOp<ConstantObj>(output)->getOutput();
}
Tensor GraphHandlerObj::unsqueeze(Tensor input, const vector<int> &axes,
Tensor output) {
if (output) {
g->addOpWithOutputs<UnsqueezeObj>(std::move(input), axes, output);
return output;
} else
return g->addOp<UnsqueezeObj>(std::move(input), axes, output)
->getOutput();
}
static DataType dtype_repr_convert(int dtype) {
switch ((OnnxDType)dtype) {
case OnnxDType::FLOAT:
@ -243,8 +263,11 @@ static DataType dtype_repr_convert(int dtype) {
return DataType::Int16;
case OnnxDType::INT32:
return DataType::Int32;
// TODO: conver Int64 to int32, because some backend like dnnl donot support
// int64
case OnnxDType::INT64:
return DataType::Int64;
// return DataType::Int64;
return DataType::Int32;
default:
IT_ASSERT(false, "Unsupported data type");
}

View File

@ -62,7 +62,11 @@ void TensorObj::printData() const {
if (dtype == DataType::Float32)
printDataFloat();
else if (dtype == DataType::UInt32)
printDataUint32_t();
printDataInteger<unsigned int>();
else if (dtype == DataType::Int32)
printDataInteger<int>();
else if (dtype == DataType::Int64)
printDataInteger<long long>();
else
IT_TODO_HALT();
}
@ -94,12 +98,12 @@ void TensorObj::printDataFloat() const {
}
}
void TensorObj::printDataUint32_t() const {
template <typename T> void TensorObj::printDataInteger() const {
IT_ASSERT(data != nullptr);
std::cout << "Tensor: " << guid << std::endl;
auto numDims = shape.size();
auto dimSzVec = std::vector<int>(numDims, 1);
auto ptr = data->getPtr<VType *>();
auto ptr = data->getPtr<T *>();
dimSzVec[numDims - 1] = shape[numDims - 1];
for (int i = numDims - 1; i != 0; --i)
dimSzVec[i - 1] = dimSzVec[i] * shape[i - 1];
@ -133,9 +137,15 @@ bool TensorObj::equalData(const Tensor &rhs) const {
if (getDType() == DataType::UInt32)
return equalDataImpl(getRawDataPtr<uint32_t *>(),
rhs->getRawDataPtr<uint32_t *>(), size());
if (getDType() == DataType::Int32)
return equalDataImpl(getRawDataPtr<int32_t *>(),
rhs->getRawDataPtr<int32_t *>(), size());
else if (getDType() == DataType::Float32)
return equalDataImpl(getRawDataPtr<float *>(),
rhs->getRawDataPtr<float *>(), size());
else if (getDType() == DataType::Int64)
return equalDataImpl(getRawDataPtr<long long *>(),
rhs->getRawDataPtr<long long *>(), size());
else
IT_TODO_HALT();
}

View File

@ -195,6 +195,7 @@ void init_graph_builder(py::module &m) {
py::class_<TensorObj, std::shared_ptr<TensorObj>>(m, "Tensor")
.def("fuid", &TensorObj::getFuid, policy::automatic)
.def("shape", &TensorObj::getDims, policy::move)
.def("data_malloc", &TensorObj::dataMalloc, policy::move)
.def("copyin_float", &TensorObj::copyin<float>, policy::move)
.def("copyin_int32", &TensorObj::copyin<int32_t>, policy::move)
.def("copyin_int64", &TensorObj::copyin<int64_t>, policy::move)
@ -236,6 +237,8 @@ void init_graph_builder(py::module &m) {
.def("gather", &Handler::gather, policy::move)
.def("reduce_mean", &Handler::reduceMean, policy::move)
.def("slice", &Handler::slice, policy::move)
.def("constant", &Handler::constant, policy::move)
.def("unsqueeze", &Handler::unsqueeze, policy::move)
.def("pad", &Handler::pad, policy::move)
.def("topo_sort", &Handler::topo_sort, policy::automatic)
.def("operators", &Handler::operators, policy::move)

View File

@ -0,0 +1,15 @@
#include "operators/constant.h"
#include "intelcpu/mkl_kernel_without_config.h"
namespace infini {
class ConstantKernel : public MklKernelWithoutConfig {
void compute(const Operator &_op,
const RuntimeObj *context) const override {
;
}
};
REGISTER_KERNEL(Device::INTELCPU, OpType::Constant, DataType::Int32,
ConstantKernel, "Constant_Mkl_Int32");
REGISTER_KERNEL(Device::INTELCPU, OpType::Constant, DataType::Float32,
ConstantKernel, "Constant_Mkl_Float32");
} // namespace infini

View File

@ -38,12 +38,12 @@ class MklGather : public MklKernelWithoutConfig {
sycl::queue q(sycl::cpu_selector{});
auto inDevice = sycl::malloc_device<float>(iSize, q);
auto indexDevice = sycl::malloc_device<uint32_t>(idxSize, q);
auto indexDevice = sycl::malloc_device<int32_t>(idxSize, q);
auto outDevice = sycl::malloc_device<float>(oSize, q);
q.memcpy(inDevice, in->getRawDataPtr<float *>(), iSize * sizeof(float));
q.memcpy(indexDevice, index->getRawDataPtr<uint32_t *>(),
idxSize * sizeof(uint32_t));
q.memcpy(indexDevice, index->getRawDataPtr<int32_t *>(),
idxSize * sizeof(int32_t));
q.wait();
q.parallel_for(sycl::range<1>(oSize), [=](sycl::id<1> index) {

View File

@ -53,9 +53,9 @@ template <typename T> class MklDpcppMatmul : public CpuKernelWithoutConfig {
// create execution queue and buffers of matrix data
cl::sycl::queue main_queue(sycl::cpu_selector{}, exception_handler);
cl::sycl::buffer<float, 1> A_buffer(A, op->getInputs(0)->size());
cl::sycl::buffer<float, 1> B_buffer(B, op->getInputs(1)->size());
cl::sycl::buffer<float, 1> C_buffer(C, op->getOutput(0)->size());
cl::sycl::buffer<T, 1> A_buffer(A, op->getInputs(0)->size());
cl::sycl::buffer<T, 1> B_buffer(B, op->getInputs(1)->size());
cl::sycl::buffer<T, 1> C_buffer(C, op->getOutput(0)->size());
// add oneapi::mkl::blas::gemm to execution queue
try {

View File

@ -1,7 +1,7 @@
#include "operators/reshape.h"
#include "intelcpu/mkl_kernel_without_config.h"
#include "intelcpu/mkl_runtime.h"
#include "operators/unsqueeze.h"
namespace infini {
class MklReshape : public MklKernelWithoutConfig {
void compute(const Operator &op,
@ -47,4 +47,6 @@ REGISTER_KERNEL(Device::INTELCPU, OpType::Identity, DataType::Float32,
MklReshape, "Identify_Mkl_Float32");
REGISTER_KERNEL(Device::INTELCPU, OpType::Flatten, DataType::Float32,
MklReshape, "Flatten_Mkl_Float32");
REGISTER_KERNEL(Device::INTELCPU, OpType::Unsqueeze, DataType::Float32,
MklReshape, "Unsqueeze_Mkl_Float32");
}; // namespace infini

21
src/operators/constant.cc Normal file
View File

@ -0,0 +1,21 @@
#include "operators/constant.h"
namespace infini {
std::string ConstantObj::toString() const {
std::ostringstream os;
os << "Constant[" << getGuid() << "]";
os << "output = " << outputs[0]->getGuid() << ",";
return os.str();
}
vector<int> ConstantObj::getWorkloadVector() const {
vector<int> ret = outputs[0]->getDims();
ret.emplace(ret.begin(), enum_to_underlying(type));
return ret;
}
// need eps and momentum?
vector<int> ConstantObj::getOpAttrVector() const {
return {enum_to_underlying(type)};
}
} // namespace infini

View File

@ -28,7 +28,7 @@ optional<vector<Shape>> GatherObj::inferShape(const TensorVec &inputs) const {
vector<DataType> GatherObj::inferDataType(const TensorVec &inputs) const {
IT_ASSERT(inputs.size() == 2);
auto index = inputs[1];
IT_ASSERT(index->getDType() == DataType::UInt32);
IT_ASSERT(index->getDType() == DataType::Int32);
return {inputs[0]->getDType()};
}

View File

@ -0,0 +1,65 @@
#include "operators/unsqueeze.h"
namespace infini {
UnsqueezeObj::UnsqueezeObj(GraphObj *graph, Tensor in,
const std::vector<int> &index, Tensor out)
: OperatorObj(OpType::Unsqueeze, {in}, {out}) {
IT_ASSERT(parseAxis(index, axis));
IT_ASSERT(checkValid(graph));
}
bool UnsqueezeObj::parseAxis(const std::vector<int> &index,
std::set<int> &axis) const {
bool ret = true;
int nDim = inputs[0]->getDims().size() + index.size();
for (size_t i = 0; i < index.size(); ++i) {
int data = index[i];
if (data < 0)
data += nDim;
if (data >= nDim) {
ret = false;
break;
}
if (axis.find(data) != axis.end()) {
ret = false;
break;
}
axis.insert(data);
}
return ret;
}
optional<vector<Shape>>
UnsqueezeObj::inferShape(const TensorVec &inputs) const {
Shape dims = inputs[0]->getDims();
for (int i : axis) {
auto it = dims.begin();
dims.insert(std::next(it, i), 1);
}
return {{dims}};
}
std::string UnsqueezeObj::toString() const {
std::ostringstream os;
os << "Unsqueeze[" << getGuid() << "]";
os << "(";
os << "inputs=";
for (auto i = 0; i < numInputs(); i++)
os << inputs[i]->getGuid() << ",";
os << "output=" << outputs[0]->getGuid() << ",";
os << ")";
return os.str();
}
vector<int> UnsqueezeObj::getWorkloadVector() const {
vector<int> ret = inputs[0]->getDims();
ret.emplace(ret.begin(), enum_to_underlying(type));
for (auto i : axis)
ret.emplace_back(i);
return ret;
}
vector<int> UnsqueezeObj::getOpAttrVector() const {
return {enum_to_underlying(type)};
}
} // namespace infini

View File

@ -0,0 +1,26 @@
#include "core/graph.h"
#include "core/runtime.h"
#include "intelcpu/mkl_runtime.h"
#include "operators/unsqueeze.h"
#include "test.h"
namespace infini {
TEST(Unsqueeze, Mkl) {
Runtime runtime = MklRuntimeObj::getInstance();
Graph g = make_ref<GraphObj>(runtime);
auto input = g->addTensor({2, 3, 3}, DataType::Float32);
vector<int> index{1, 0};
auto op = g->addOp<UnsqueezeObj>(input, index, nullptr);
g->dataMalloc();
input->setData(IncrementalGenerator());
runtime->run(g);
auto o = g->cloneTensor(op->getOutput(0));
// check results
EXPECT_TRUE(o->equalData(input));
}
} // namespace infini

View File

@ -0,0 +1,29 @@
#include "core/graph.h"
#include "core/runtime.h"
#include "operators/unsqueeze.h"
#include "test.h"
namespace infini {
TEST(Unsqueeze, ShapeInfer) {
{
Runtime runtime = NativeCpuRuntimeObj::getInstance();
Graph g = make_ref<GraphObj>(runtime);
auto input = g->addTensor({1, 3, 2, 15}, DataType::Float32);
vector<int> index{1, 6, 0};
auto op = g->addOp<UnsqueezeObj>(input, index, nullptr);
EXPECT_EQ(op->getOutput(0)->getDims(), (Shape{1, 1, 1, 3, 2, 15, 1}));
}
{
Runtime runtime = NativeCpuRuntimeObj::getInstance();
Graph g = make_ref<GraphObj>(runtime);
auto input = g->addTensor({1, 3, 2, 15}, DataType::Float32);
vector<int> index{-6, -1, -7};
auto op = g->addOp<UnsqueezeObj>(input, index, nullptr);
EXPECT_EQ(op->getOutput(0)->getDims(), (Shape{1, 1, 1, 3, 2, 15, 1}));
}
}
} // namespace infini