forked from jiuyuan/InfiniTensor
ADD unsqueeze
fix some bugs fix some for onnx_stub add Constant operator Fix onnx data parse and onnx_run api , and add some tests for model.
This commit is contained in:
parent
f2be122efc
commit
f021afe199
|
@ -80,6 +80,8 @@ class GraphHandlerObj {
|
|||
const optional<vector<int>> &steps);
|
||||
Tensor pad(Tensor input, Tensor output, const vector<int> &pads,
|
||||
const optional<vector<int>> &axes);
|
||||
Tensor constant(Tensor output);
|
||||
Tensor unsqueeze(Tensor input, const vector<int> &axes, Tensor output);
|
||||
|
||||
//------ modifiers
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ enum class OpType {
|
|||
Mul,
|
||||
Div,
|
||||
Pow,
|
||||
Sqrt,
|
||||
Gather,
|
||||
ReduceMean,
|
||||
Reshape,
|
||||
|
@ -39,6 +40,8 @@ enum class OpType {
|
|||
Tanh,
|
||||
Abs,
|
||||
Resize,
|
||||
Constant,
|
||||
Unsqueeze,
|
||||
//
|
||||
MemBound = 300,
|
||||
};
|
||||
|
|
|
@ -104,7 +104,7 @@ class TensorObj : public TensorBaseObj {
|
|||
|
||||
private:
|
||||
void printDataFloat() const;
|
||||
void printDataUint32_t() const;
|
||||
template <typename T> void printDataInteger() const;
|
||||
|
||||
template <typename T>
|
||||
bool equalDataImpl(const T *a, const T *b, size_t size) const {
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include "core/operator.h"
|
||||
|
||||
namespace infini {
|
||||
class ConstantObj : public OperatorObj {
|
||||
|
||||
public:
|
||||
ConstantObj(GraphObj *graph, Tensor output)
|
||||
: OperatorObj(OpType::Constant, {}, {output}) {
|
||||
IT_ASSERT(output);
|
||||
IT_ASSERT(checkValid(graph));
|
||||
}
|
||||
OP_CLONE(ConstantObj);
|
||||
|
||||
optional<vector<Shape>> inferShape(const TensorVec &inputs) const {
|
||||
return {{outputs[0]->getDims()}};
|
||||
};
|
||||
|
||||
std::string toString() const override;
|
||||
int numInputs() const override { return 0; }
|
||||
int numOutputs() const override { return 1; }
|
||||
void makeConstant() {}
|
||||
|
||||
private:
|
||||
vector<int> getWorkloadVector() const override;
|
||||
vector<int> getOpAttrVector() const override;
|
||||
};
|
||||
} // namespace infini
|
|
@ -46,4 +46,5 @@ DEFINE_ELEMENT_WISE_OBJ(Sub, OpType::Sub)
|
|||
DEFINE_ELEMENT_WISE_OBJ(Mul, OpType::Mul)
|
||||
DEFINE_ELEMENT_WISE_OBJ(Div, OpType::Div)
|
||||
DEFINE_ELEMENT_WISE_OBJ(Pow, OpType::Pow)
|
||||
DEFINE_ELEMENT_WISE_OBJ(Sqrt, OpType::Sqrt)
|
||||
}; // namespace infini
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
#include "core/operator.h"
|
||||
|
||||
namespace infini {
|
||||
class UnsqueezeObj : public OperatorObj {
|
||||
set<int> axis;
|
||||
|
||||
public:
|
||||
UnsqueezeObj(GraphObj *graph, Tensor input, const vector<int> &axis,
|
||||
Tensor output);
|
||||
OP_CLONE(UnsqueezeObj);
|
||||
|
||||
optional<vector<Shape>> inferShape(const TensorVec &inputs) const override;
|
||||
std::string toString() const override;
|
||||
int numInputs() const override { return 1; }
|
||||
int numOutputs() const override { return 1; }
|
||||
|
||||
private:
|
||||
bool parseAxis(const std::vector<int> &index, std::set<int> &axis) const;
|
||||
vector<int> getWorkloadVector() const override;
|
||||
vector<int> getOpAttrVector() const override;
|
||||
};
|
||||
|
||||
} // namespace infini
|
|
@ -16,6 +16,8 @@ class DataGenerator {
|
|||
fill(reinterpret_cast<uint32_t *>(data), size);
|
||||
else if (dataType == DataType::Float32)
|
||||
fill(reinterpret_cast<float *>(data), size);
|
||||
else if (dataType == DataType::Int64)
|
||||
fill(reinterpret_cast<long long *>(data), size);
|
||||
else
|
||||
IT_TODO_HALT();
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import backend
|
||||
import numpy as np
|
||||
from onnx import (
|
||||
ModelProto,
|
||||
TensorProto,
|
||||
|
@ -24,13 +25,16 @@ from onnx.checker import (
|
|||
from onnx.shape_inference import infer_shapes
|
||||
from typing import Dict, List, Any, Tuple, Sequence, Union, Optional
|
||||
from functools import reduce
|
||||
import struct
|
||||
|
||||
# TODO: do we need need runtime here
|
||||
runtime = backend.runtime()
|
||||
|
||||
class OnnxStub:
|
||||
inputs: Dict[str, backend.Tensor] = {}
|
||||
outputs: Dict[str, backend.Tensor] = {}
|
||||
initializer: Dict[int, TensorProto] = {}
|
||||
inputs: Dict[str, backend.Tensor] = {} #只包含图最初的输入,不是包含每一层的输入
|
||||
outputs: Dict[str, backend.Tensor] = {} #只包含图最后的输出
|
||||
initializer: Dict[int, TensorProto] = {} #包含图每一层的权重和参数
|
||||
tensors: Dict[int, TensorProto] = {} #包含所有tensor
|
||||
handler: backend.GraphHandler
|
||||
|
||||
def __init__(self, model: ModelProto, runtime):
|
||||
|
@ -39,13 +43,13 @@ class OnnxStub:
|
|||
|
||||
tensors: Dict[str, backend.Tensor] = dict()
|
||||
data: Dict[str, TensorProto] = dict()
|
||||
|
||||
|
||||
for input in model.graph.input:
|
||||
dims = _take_shape_dim(input.type.tensor_type.shape)
|
||||
tensors[input.name] = self.handler.tensor(
|
||||
dims, input.type.tensor_type.elem_type
|
||||
)
|
||||
|
||||
|
||||
for output in model.graph.output:
|
||||
dims = _take_shape_dim(output.type.tensor_type.shape)
|
||||
tensors[output.name] = self.handler.tensor(
|
||||
|
@ -54,6 +58,10 @@ class OnnxStub:
|
|||
|
||||
for initializer in model.graph.initializer:
|
||||
data[initializer.name] = initializer
|
||||
# weights and params are not in "model.graph.input" when .onnx is converted from .pth
|
||||
tensors[initializer.name] = self.handler.tensor(
|
||||
initializer.dims, initializer.data_type
|
||||
)
|
||||
|
||||
for node in model.graph.node:
|
||||
if node.op_type == "Conv":
|
||||
|
@ -331,9 +339,30 @@ class OnnxStub:
|
|||
_parse_data(data[node.input[1]]),
|
||||
_parse_data(data[node.input[3]]) if len(node.input) > 3 else None,
|
||||
)
|
||||
elif node.op_type == "Constant":
|
||||
attr = next((attr for attr in node.attribute if attr.name == "value"), None)
|
||||
if attr == None:
|
||||
raise Exception("no value in constant nodeproto")
|
||||
if attr.type == 4: #TENSOR
|
||||
value_tensor = self.handler.tensor(
|
||||
[self.get_data_len(attr.t)], attr.t.data_type
|
||||
)
|
||||
value_tensor.data_malloc()
|
||||
self.fill_value(value_tensor, attr.t)
|
||||
tensors[node.output[0]] = self.handler.constant(
|
||||
value_tensor
|
||||
)
|
||||
else :
|
||||
raise Exception("TODO")
|
||||
elif node.op_type == "Unsqueeze":
|
||||
tensors[node.output[0]] = self.handler.unsqueeze(
|
||||
tensors[node.input[0]],
|
||||
_parse_data(data[node.input[1]]) if len(node.input) > 1 else next((attr.ints for attr in node.attribute if attr.name == "axes")),
|
||||
tensors.get(node.output[0]))
|
||||
else:
|
||||
print(node)
|
||||
raise Exception('Unsupported operator "{}"'.format(node.op_type))
|
||||
|
||||
|
||||
self.handler.data_malloc()
|
||||
|
||||
for name, obj in tensors.items():
|
||||
|
@ -341,19 +370,84 @@ class OnnxStub:
|
|||
if tensor == None:
|
||||
if any(input.name == name for input in model.graph.input):
|
||||
self.inputs[name] = obj
|
||||
else:
|
||||
if tensor != None:
|
||||
self.initializer[obj.fuid()] = tensor
|
||||
if tensor.data_type == TensorProto.INT32:
|
||||
obj.copyin_int32([int(i) for i in tensor.int32_data])
|
||||
elif tensor.data_type == TensorProto.INT64:
|
||||
obj.copyin_int64([int(i) for i in tensor.int64_data])
|
||||
elif tensor.data_type == TensorProto.FLOAT:
|
||||
obj.copyin_float([int(i) for i in tensor.float_data])
|
||||
else:
|
||||
assert False, "Unsupported Tensor Type: {}".format(tensor.data_type)
|
||||
self.fill_value(obj, tensor)
|
||||
|
||||
for output in model.graph.output:
|
||||
self.outputs[output.name] = tensors[output.name]
|
||||
self.outputs[output.name] = tensors[output.name]
|
||||
|
||||
self.tensors = tensors
|
||||
|
||||
def get_data_len(self, tensor)-> int:
|
||||
length = 0
|
||||
ele_size=1
|
||||
if tensor.data_type == TensorProto.INT32:
|
||||
length = len(tensor.int32_data)
|
||||
ele_size = 4
|
||||
elif tensor.data_type == TensorProto.INT64:
|
||||
length = len(tensor.int64_data)
|
||||
ele_size = 8
|
||||
elif tensor.data_type == TensorProto.FLOAT:
|
||||
length = len(tensor.float_data)
|
||||
ele_size = 4
|
||||
else:
|
||||
assert False, "Unsupported Tensor Type: {}".format(tensor.data_type)
|
||||
if length == 0:
|
||||
length = len(tensor.raw_data) // ele_size
|
||||
return length
|
||||
|
||||
def fill_inputs(self, inputs: List[TensorProto]):
|
||||
for tensor in inputs:
|
||||
obj = self.inputs[tensor.name]
|
||||
self.fill_value(obj, tensor)
|
||||
|
||||
def fill_value(self, obj, tensor):
|
||||
# Data may be stored in raw_data when .onnx is converted from .pth
|
||||
# or when parsing Constant oprator
|
||||
if tensor.data_type == TensorProto.INT32:
|
||||
int32_data = tensor.int32_data
|
||||
if len(tensor.int32_data) == 0:
|
||||
int32_data = self.parse_data(tensor.raw_data, tensor.data_type)
|
||||
obj.copyin_int32([int(i) for i in tensor.int32_data])
|
||||
elif tensor.data_type == TensorProto.INT64:
|
||||
int64_data = tensor.int64_data
|
||||
if len(tensor.int64_data) == 0:
|
||||
int64_data = self.parse_data(tensor.raw_data, tensor.data_type)
|
||||
# todo: convert int64 to int32, because backend donot support int64!
|
||||
obj.copyin_int32([np.int32(i) for i in int64_data])
|
||||
elif tensor.data_type == TensorProto.FLOAT:
|
||||
float_data = tensor.float_data
|
||||
if len(float_data) == 0:
|
||||
float_data = self.parse_data(tensor.raw_data, tensor.data_type)
|
||||
obj.copyin_float([i for i in float_data])
|
||||
else:
|
||||
assert False, "Unsupported Tensor Type: {}".format(tensor.data_type)
|
||||
|
||||
def parse_data(self, raw_data, dtype):
|
||||
if dtype == TensorProto.FLOAT:
|
||||
fmt = 'f'
|
||||
elem_size = 4
|
||||
elif dtype == TensorProto.FLOAT16:
|
||||
fmt = 'e'
|
||||
elem_size = 2
|
||||
elif dtype == TensorProto.INT32:
|
||||
fmt = 'i'
|
||||
elem_size = 4
|
||||
elif dtype == TensorProto.INT64:
|
||||
fmt = 'q'
|
||||
elem_size = 8
|
||||
else:
|
||||
raise ValueError('Unsupported data type')
|
||||
num_elems = len(raw_data) // elem_size
|
||||
data = []
|
||||
for i in range(num_elems):
|
||||
start_idx = i * elem_size
|
||||
end_idx = start_idx + elem_size
|
||||
elem_bytes = raw_data[start_idx:end_idx]
|
||||
elem_value = struct.unpack(fmt, elem_bytes)[0]
|
||||
data.append(elem_value)
|
||||
return data
|
||||
|
||||
def to_onnx(self, name: str) -> ModelProto:
|
||||
class Context:
|
||||
|
@ -576,9 +670,11 @@ def from_onnx(model: ModelProto, runtime):
|
|||
stub = OnnxStub(model, runtime)
|
||||
return stub.inputs, stub.outputs, stub.handler
|
||||
|
||||
def run_onnx(model: ModelProto, runtime):
|
||||
def run_onnx(model: ModelProto, inputs: List[TensorProto]):
|
||||
stub = OnnxStub(model, runtime)
|
||||
stub.fill_inputs(inputs)
|
||||
stub.run()
|
||||
return stub.outputs
|
||||
|
||||
def _parse_attribute(node: NodeProto, attrs: Dict[str, Any] = dict()) -> Dict[str, Any]:
|
||||
for attr in node.attribute:
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
import torch
|
||||
import torchvision
|
||||
import os
|
||||
from models.drn import drn_a_50, drn_c_26
|
||||
|
||||
|
||||
def export_model(modelfn, file_name):
|
||||
files = os.listdir()
|
||||
file = next(
|
||||
(name for name in files if name.endswith(file_name)), None
|
||||
)
|
||||
if file == None:
|
||||
# Load the pretrained model
|
||||
model = modelfn(pretrained = True)
|
||||
# Set the model to evaluation mode
|
||||
model.eval()
|
||||
|
||||
# Create a sample input tensor
|
||||
input_tensor = torch.randn(1, 3, 224, 224)
|
||||
# Export the model to ONNX format
|
||||
torch.onnx.export(model, input_tensor, file_name)
|
||||
|
||||
|
||||
|
||||
from transformers import BertTokenizer, BertModel
|
||||
def export_bert():
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertModel.from_pretrained("bert-base-uncased")
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(text, return_tensors='pt')
|
||||
#output = model(**encoded_input)
|
||||
#inputs = torch.randn(1, 128, dtype=torch.float32)
|
||||
input_names = ["input_ids", "attention_mask", "token_type_ids"]
|
||||
torch.onnx.export(model,tuple(encoded_input.values()),
|
||||
f="bert.onnx",
|
||||
input_names=input_names,
|
||||
|
||||
output_names=["output"])
|
||||
# https://huggingface.co/blog/convert-transformers-to-onnx#4-how-can-i-convert-a-transformers-model-bert-to-onnx
|
||||
|
||||
#export_model(torchvision.models.resnet18, "resnet18.onnx")
|
||||
#export_model(torchvision.models.resnet50, "resnet50.onnx")
|
||||
#export_model(drn_c_26, "drn_c_26.onnx")
|
||||
export_model(torchvision.models.inception_v3, "inception_v3.onnx")
|
|
@ -0,0 +1,416 @@
|
|||
# source from https://github.com/fyu/drn/blob/master/drn.py
|
||||
|
||||
import pdb
|
||||
|
||||
import torch.nn as nn
|
||||
import math
|
||||
import torch.utils.model_zoo as model_zoo
|
||||
|
||||
BatchNorm = nn.BatchNorm2d
|
||||
|
||||
|
||||
# __all__ = ['DRN', 'drn26', 'drn42', 'drn58']
|
||||
|
||||
|
||||
webroot = 'http://dl.yf.io/drn/'
|
||||
|
||||
model_urls = {
|
||||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
|
||||
'drn-c-26': webroot + 'drn_c_26-ddedf421.pth',
|
||||
'drn-c-42': webroot + 'drn_c_42-9d336e8c.pth',
|
||||
'drn-c-58': webroot + 'drn_c_58-0a53a92c.pth',
|
||||
'drn-d-22': webroot + 'drn_d_22-4bd2f8ea.pth',
|
||||
'drn-d-38': webroot + 'drn_d_38-eebb45f0.pth',
|
||||
'drn-d-54': webroot + 'drn_d_54-0e0534ff.pth',
|
||||
'drn-d-105': webroot + 'drn_d_105-12b40979.pth'
|
||||
}
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1, padding=1, dilation=1):
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=padding, bias=False, dilation=dilation)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None,
|
||||
dilation=(1, 1), residual=True):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.conv1 = conv3x3(inplanes, planes, stride,
|
||||
padding=dilation[0], dilation=dilation[0])
|
||||
self.bn1 = BatchNorm(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(planes, planes,
|
||||
padding=dilation[1], dilation=dilation[1])
|
||||
self.bn2 = BatchNorm(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
self.residual = residual
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
if self.residual:
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None,
|
||||
dilation=(1, 1), residual=True):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = BatchNorm(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
||||
padding=dilation[1], bias=False,
|
||||
dilation=dilation[1])
|
||||
self.bn2 = BatchNorm(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
||||
self.bn3 = BatchNorm(planes * 4)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class DRN(nn.Module):
|
||||
|
||||
def __init__(self, block, layers, num_classes=1000,
|
||||
channels=(16, 32, 64, 128, 256, 512, 512, 512),
|
||||
out_map=False, out_middle=False, pool_size=28, arch='D'):
|
||||
super(DRN, self).__init__()
|
||||
self.inplanes = channels[0]
|
||||
self.out_map = out_map
|
||||
self.out_dim = channels[-1]
|
||||
self.out_middle = out_middle
|
||||
self.arch = arch
|
||||
|
||||
if arch == 'C':
|
||||
self.conv1 = nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
|
||||
padding=3, bias=False)
|
||||
self.bn1 = BatchNorm(channels[0])
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
self.layer1 = self._make_layer(
|
||||
BasicBlock, channels[0], layers[0], stride=1)
|
||||
self.layer2 = self._make_layer(
|
||||
BasicBlock, channels[1], layers[1], stride=2)
|
||||
elif arch == 'D':
|
||||
self.layer0 = nn.Sequential(
|
||||
nn.Conv2d(3, channels[0], kernel_size=7, stride=1, padding=3,
|
||||
bias=False),
|
||||
BatchNorm(channels[0]),
|
||||
nn.ReLU(inplace=True)
|
||||
)
|
||||
|
||||
self.layer1 = self._make_conv_layers(
|
||||
channels[0], layers[0], stride=1)
|
||||
self.layer2 = self._make_conv_layers(
|
||||
channels[1], layers[1], stride=2)
|
||||
|
||||
self.layer3 = self._make_layer(block, channels[2], layers[2], stride=2)
|
||||
self.layer4 = self._make_layer(block, channels[3], layers[3], stride=2)
|
||||
self.layer5 = self._make_layer(block, channels[4], layers[4],
|
||||
dilation=2, new_level=False)
|
||||
self.layer6 = None if layers[5] == 0 else \
|
||||
self._make_layer(block, channels[5], layers[5], dilation=4,
|
||||
new_level=False)
|
||||
|
||||
if arch == 'C':
|
||||
self.layer7 = None if layers[6] == 0 else \
|
||||
self._make_layer(BasicBlock, channels[6], layers[6], dilation=2,
|
||||
new_level=False, residual=False)
|
||||
self.layer8 = None if layers[7] == 0 else \
|
||||
self._make_layer(BasicBlock, channels[7], layers[7], dilation=1,
|
||||
new_level=False, residual=False)
|
||||
elif arch == 'D':
|
||||
self.layer7 = None if layers[6] == 0 else \
|
||||
self._make_conv_layers(channels[6], layers[6], dilation=2)
|
||||
self.layer8 = None if layers[7] == 0 else \
|
||||
self._make_conv_layers(channels[7], layers[7], dilation=1)
|
||||
|
||||
if num_classes > 0:
|
||||
self.avgpool = nn.AvgPool2d(pool_size)
|
||||
self.fc = nn.Conv2d(self.out_dim, num_classes, kernel_size=1,
|
||||
stride=1, padding=0, bias=True)
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||
elif isinstance(m, BatchNorm):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilation=1,
|
||||
new_level=True, residual=True):
|
||||
assert dilation == 1 or dilation % 2 == 0
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
BatchNorm(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = list()
|
||||
layers.append(block(
|
||||
self.inplanes, planes, stride, downsample,
|
||||
dilation=(1, 1) if dilation == 1 else (
|
||||
dilation // 2 if new_level else dilation, dilation),
|
||||
residual=residual))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, residual=residual,
|
||||
dilation=(dilation, dilation)))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _make_conv_layers(self, channels, convs, stride=1, dilation=1):
|
||||
modules = []
|
||||
for i in range(convs):
|
||||
modules.extend([
|
||||
nn.Conv2d(self.inplanes, channels, kernel_size=3,
|
||||
stride=stride if i == 0 else 1,
|
||||
padding=dilation, bias=False, dilation=dilation),
|
||||
BatchNorm(channels),
|
||||
nn.ReLU(inplace=True)])
|
||||
self.inplanes = channels
|
||||
return nn.Sequential(*modules)
|
||||
|
||||
def forward(self, x):
|
||||
y = list()
|
||||
|
||||
if self.arch == 'C':
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
elif self.arch == 'D':
|
||||
x = self.layer0(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
y.append(x)
|
||||
x = self.layer2(x)
|
||||
y.append(x)
|
||||
|
||||
x = self.layer3(x)
|
||||
y.append(x)
|
||||
|
||||
x = self.layer4(x)
|
||||
y.append(x)
|
||||
|
||||
x = self.layer5(x)
|
||||
y.append(x)
|
||||
|
||||
if self.layer6 is not None:
|
||||
x = self.layer6(x)
|
||||
y.append(x)
|
||||
|
||||
if self.layer7 is not None:
|
||||
x = self.layer7(x)
|
||||
y.append(x)
|
||||
|
||||
if self.layer8 is not None:
|
||||
x = self.layer8(x)
|
||||
y.append(x)
|
||||
|
||||
if self.out_map:
|
||||
x = self.fc(x)
|
||||
else:
|
||||
x = self.avgpool(x)
|
||||
x = self.fc(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
|
||||
if self.out_middle:
|
||||
return x, y
|
||||
else:
|
||||
return x
|
||||
|
||||
|
||||
class DRN_A(nn.Module):
|
||||
|
||||
def __init__(self, block, layers, num_classes=1000):
|
||||
self.inplanes = 64
|
||||
super(DRN_A, self).__init__()
|
||||
self.out_dim = 512 * block.expansion
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
|
||||
bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
|
||||
dilation=2)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
|
||||
dilation=4)
|
||||
self.avgpool = nn.AvgPool2d(28, stride=1)
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||
elif isinstance(m, BatchNorm):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
|
||||
# for m in self.modules():
|
||||
# if isinstance(m, nn.Conv2d):
|
||||
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
# elif isinstance(m, nn.BatchNorm2d):
|
||||
# nn.init.constant_(m.weight, 1)
|
||||
# nn.init.constant_(m.bias, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes,
|
||||
dilation=(dilation, dilation)))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def drn_a_50(pretrained=False, **kwargs):
|
||||
model = DRN_A(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_c_26(pretrained=False, **kwargs):
|
||||
model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-c-26']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_c_42(pretrained=False, **kwargs):
|
||||
model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-c-42']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_c_58(pretrained=False, **kwargs):
|
||||
model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-c-58']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_d_22(pretrained=False, **kwargs):
|
||||
model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='D', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-22']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_d_24(pretrained=False, **kwargs):
|
||||
model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 2, 2], arch='D', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-24']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_d_38(pretrained=False, **kwargs):
|
||||
model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-38']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_d_40(pretrained=False, **kwargs):
|
||||
model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-40']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_d_54(pretrained=False, **kwargs):
|
||||
model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-54']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_d_56(pretrained=False, **kwargs):
|
||||
model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-56']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_d_105(pretrained=False, **kwargs):
|
||||
model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 1, 1], arch='D', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-105']))
|
||||
return model
|
||||
|
||||
|
||||
def drn_d_107(pretrained=False, **kwargs):
|
||||
model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 2, 2], arch='D', **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['drn-d-107']))
|
||||
return model
|
|
@ -0,0 +1,82 @@
|
|||
import os, onnx, unittest
|
||||
from typing import Dict
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
from PIL import Image
|
||||
from onnx import TensorProto
|
||||
from onnx.helper import (
|
||||
make_model,
|
||||
make_node,
|
||||
make_tensor,
|
||||
make_graph,
|
||||
make_tensor_value_info,
|
||||
)
|
||||
from onnx.checker import check_model
|
||||
from pyinfinitensor.onnx import from_onnx, backend, run_onnx
|
||||
import onnxruntime
|
||||
|
||||
def pre_process(img) :
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
transfn = transforms.Compose([transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
])
|
||||
#img = np.transpose(img,(1,2,0))
|
||||
return transfn(img)
|
||||
|
||||
def model_run(onnx_file_name):
|
||||
dir_path = "./"
|
||||
files = os.listdir(dir_path)
|
||||
model_file = next(
|
||||
(name for name in files if name.endswith(onnx_file_name)), None
|
||||
)
|
||||
|
||||
if model_file != None:
|
||||
model_path = os.path.join(dir_path, model_file)
|
||||
print(
|
||||
"model: {file}({size:.2f} MiB)".format(
|
||||
file=model_path, size=os.path.getsize(model_path) / 1024 / 1024
|
||||
)
|
||||
)
|
||||
model = onnx.load(model_path)
|
||||
check_model(model)
|
||||
|
||||
session = onnxruntime.InferenceSession(model.SerializeToString(), None)
|
||||
input_name = session.get_inputs()[0].name
|
||||
input_tensor = make_tensor(input_name, TensorProto.FLOAT, input_data.shape, input_data)
|
||||
run_onnx(model, [input_tensor])
|
||||
|
||||
def get_img_data():
|
||||
dir_path = "./"
|
||||
files = os.listdir(dir_path)
|
||||
img_file = next(
|
||||
(name for name in files if name.endswith(".jpg")), None
|
||||
)
|
||||
if img_file != None:
|
||||
img_path = os.path.join(dir_path, img_file)
|
||||
try:
|
||||
img = Image.open(img_path)
|
||||
input_data = pre_process(img)
|
||||
img.close()
|
||||
except FileNotFoundError:
|
||||
print(f"Image not found: {img_file}")
|
||||
else:
|
||||
return input_data.numpy().astype(np.float32)
|
||||
class TestStringMethods(unittest.TestCase):
|
||||
def test_model_run(self):
|
||||
#input_data = get_img_data()
|
||||
input_data = np.random.rand(1,3,224,224).astype(np.float32)
|
||||
model_run("resnet18.onnx", input_data)
|
||||
model_run("resnet50.onnx", input_data)
|
||||
model_run("drn_c_26.onnx", input_data)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -1,4 +1,7 @@
|
|||
import os, onnx, unittest
|
||||
from typing import Dict
|
||||
import numpy as np
|
||||
import onnxruntime
|
||||
from onnx import TensorProto
|
||||
from onnx.helper import (
|
||||
make_model,
|
||||
|
@ -16,20 +19,7 @@ def make_and_import_model(graph: onnx.GraphProto):
|
|||
check_model(model)
|
||||
from_onnx(model, runtime)
|
||||
|
||||
|
||||
class TestStringMethods(unittest.TestCase):
|
||||
#def test_run(self):
|
||||
# model_file = next(
|
||||
# (name for name in os.listdir() if name.endswith(".onnx")), None
|
||||
# )
|
||||
# if model_file != None:
|
||||
# print(
|
||||
# "model: {file}({size:.2f} MiB)".format(
|
||||
# file=model_file, size=os.path.getsize(model_file) / 1024 / 1024
|
||||
# )
|
||||
# )
|
||||
# run_onnx(onnx.load(model_file), runtime)
|
||||
|
||||
def test_load(self):
|
||||
model_file = next(
|
||||
(name for name in os.listdir() if name.endswith(".onnx")), None
|
||||
|
@ -317,5 +307,6 @@ class TestStringMethods(unittest.TestCase):
|
|||
handler.reshape(x, y, [3, 2, 1])
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "core/graph_handler.h"
|
||||
#include "operators/batch_norm.h"
|
||||
#include "operators/concat.h"
|
||||
#include "operators/constant.h"
|
||||
#include "operators/conv.h"
|
||||
#include "operators/element_wise.h"
|
||||
#include "operators/gather.h"
|
||||
|
@ -12,6 +13,7 @@
|
|||
#include "operators/slice.h"
|
||||
#include "operators/softmax.h"
|
||||
#include "operators/unary.h"
|
||||
#include "operators/unsqueeze.h"
|
||||
|
||||
namespace infini {
|
||||
|
||||
|
@ -227,6 +229,24 @@ Tensor GraphHandlerObj::pad(Tensor input, Tensor output,
|
|||
}
|
||||
}
|
||||
|
||||
Tensor GraphHandlerObj::constant(Tensor output) {
|
||||
if (output) {
|
||||
g->addOpWithOutputs<ConstantObj>(output);
|
||||
return output;
|
||||
} else
|
||||
return g->addOp<ConstantObj>(output)->getOutput();
|
||||
}
|
||||
|
||||
Tensor GraphHandlerObj::unsqueeze(Tensor input, const vector<int> &axes,
|
||||
Tensor output) {
|
||||
if (output) {
|
||||
g->addOpWithOutputs<UnsqueezeObj>(std::move(input), axes, output);
|
||||
return output;
|
||||
} else
|
||||
return g->addOp<UnsqueezeObj>(std::move(input), axes, output)
|
||||
->getOutput();
|
||||
}
|
||||
|
||||
static DataType dtype_repr_convert(int dtype) {
|
||||
switch ((OnnxDType)dtype) {
|
||||
case OnnxDType::FLOAT:
|
||||
|
@ -243,8 +263,11 @@ static DataType dtype_repr_convert(int dtype) {
|
|||
return DataType::Int16;
|
||||
case OnnxDType::INT32:
|
||||
return DataType::Int32;
|
||||
// TODO: conver Int64 to int32, because some backend like dnnl donot support
|
||||
// int64
|
||||
case OnnxDType::INT64:
|
||||
return DataType::Int64;
|
||||
// return DataType::Int64;
|
||||
return DataType::Int32;
|
||||
default:
|
||||
IT_ASSERT(false, "Unsupported data type");
|
||||
}
|
||||
|
|
|
@ -62,7 +62,11 @@ void TensorObj::printData() const {
|
|||
if (dtype == DataType::Float32)
|
||||
printDataFloat();
|
||||
else if (dtype == DataType::UInt32)
|
||||
printDataUint32_t();
|
||||
printDataInteger<unsigned int>();
|
||||
else if (dtype == DataType::Int32)
|
||||
printDataInteger<int>();
|
||||
else if (dtype == DataType::Int64)
|
||||
printDataInteger<long long>();
|
||||
else
|
||||
IT_TODO_HALT();
|
||||
}
|
||||
|
@ -94,12 +98,12 @@ void TensorObj::printDataFloat() const {
|
|||
}
|
||||
}
|
||||
|
||||
void TensorObj::printDataUint32_t() const {
|
||||
template <typename T> void TensorObj::printDataInteger() const {
|
||||
IT_ASSERT(data != nullptr);
|
||||
std::cout << "Tensor: " << guid << std::endl;
|
||||
auto numDims = shape.size();
|
||||
auto dimSzVec = std::vector<int>(numDims, 1);
|
||||
auto ptr = data->getPtr<VType *>();
|
||||
auto ptr = data->getPtr<T *>();
|
||||
dimSzVec[numDims - 1] = shape[numDims - 1];
|
||||
for (int i = numDims - 1; i != 0; --i)
|
||||
dimSzVec[i - 1] = dimSzVec[i] * shape[i - 1];
|
||||
|
@ -133,9 +137,15 @@ bool TensorObj::equalData(const Tensor &rhs) const {
|
|||
if (getDType() == DataType::UInt32)
|
||||
return equalDataImpl(getRawDataPtr<uint32_t *>(),
|
||||
rhs->getRawDataPtr<uint32_t *>(), size());
|
||||
if (getDType() == DataType::Int32)
|
||||
return equalDataImpl(getRawDataPtr<int32_t *>(),
|
||||
rhs->getRawDataPtr<int32_t *>(), size());
|
||||
else if (getDType() == DataType::Float32)
|
||||
return equalDataImpl(getRawDataPtr<float *>(),
|
||||
rhs->getRawDataPtr<float *>(), size());
|
||||
else if (getDType() == DataType::Int64)
|
||||
return equalDataImpl(getRawDataPtr<long long *>(),
|
||||
rhs->getRawDataPtr<long long *>(), size());
|
||||
else
|
||||
IT_TODO_HALT();
|
||||
}
|
||||
|
|
|
@ -195,6 +195,7 @@ void init_graph_builder(py::module &m) {
|
|||
py::class_<TensorObj, std::shared_ptr<TensorObj>>(m, "Tensor")
|
||||
.def("fuid", &TensorObj::getFuid, policy::automatic)
|
||||
.def("shape", &TensorObj::getDims, policy::move)
|
||||
.def("data_malloc", &TensorObj::dataMalloc, policy::move)
|
||||
.def("copyin_float", &TensorObj::copyin<float>, policy::move)
|
||||
.def("copyin_int32", &TensorObj::copyin<int32_t>, policy::move)
|
||||
.def("copyin_int64", &TensorObj::copyin<int64_t>, policy::move)
|
||||
|
@ -236,6 +237,8 @@ void init_graph_builder(py::module &m) {
|
|||
.def("gather", &Handler::gather, policy::move)
|
||||
.def("reduce_mean", &Handler::reduceMean, policy::move)
|
||||
.def("slice", &Handler::slice, policy::move)
|
||||
.def("constant", &Handler::constant, policy::move)
|
||||
.def("unsqueeze", &Handler::unsqueeze, policy::move)
|
||||
.def("pad", &Handler::pad, policy::move)
|
||||
.def("topo_sort", &Handler::topo_sort, policy::automatic)
|
||||
.def("operators", &Handler::operators, policy::move)
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
#include "operators/constant.h"
|
||||
#include "intelcpu/mkl_kernel_without_config.h"
|
||||
|
||||
namespace infini {
|
||||
class ConstantKernel : public MklKernelWithoutConfig {
|
||||
void compute(const Operator &_op,
|
||||
const RuntimeObj *context) const override {
|
||||
;
|
||||
}
|
||||
};
|
||||
REGISTER_KERNEL(Device::INTELCPU, OpType::Constant, DataType::Int32,
|
||||
ConstantKernel, "Constant_Mkl_Int32");
|
||||
REGISTER_KERNEL(Device::INTELCPU, OpType::Constant, DataType::Float32,
|
||||
ConstantKernel, "Constant_Mkl_Float32");
|
||||
} // namespace infini
|
|
@ -38,12 +38,12 @@ class MklGather : public MklKernelWithoutConfig {
|
|||
|
||||
sycl::queue q(sycl::cpu_selector{});
|
||||
auto inDevice = sycl::malloc_device<float>(iSize, q);
|
||||
auto indexDevice = sycl::malloc_device<uint32_t>(idxSize, q);
|
||||
auto indexDevice = sycl::malloc_device<int32_t>(idxSize, q);
|
||||
auto outDevice = sycl::malloc_device<float>(oSize, q);
|
||||
|
||||
q.memcpy(inDevice, in->getRawDataPtr<float *>(), iSize * sizeof(float));
|
||||
q.memcpy(indexDevice, index->getRawDataPtr<uint32_t *>(),
|
||||
idxSize * sizeof(uint32_t));
|
||||
q.memcpy(indexDevice, index->getRawDataPtr<int32_t *>(),
|
||||
idxSize * sizeof(int32_t));
|
||||
q.wait();
|
||||
|
||||
q.parallel_for(sycl::range<1>(oSize), [=](sycl::id<1> index) {
|
||||
|
|
|
@ -53,9 +53,9 @@ template <typename T> class MklDpcppMatmul : public CpuKernelWithoutConfig {
|
|||
// create execution queue and buffers of matrix data
|
||||
cl::sycl::queue main_queue(sycl::cpu_selector{}, exception_handler);
|
||||
|
||||
cl::sycl::buffer<float, 1> A_buffer(A, op->getInputs(0)->size());
|
||||
cl::sycl::buffer<float, 1> B_buffer(B, op->getInputs(1)->size());
|
||||
cl::sycl::buffer<float, 1> C_buffer(C, op->getOutput(0)->size());
|
||||
cl::sycl::buffer<T, 1> A_buffer(A, op->getInputs(0)->size());
|
||||
cl::sycl::buffer<T, 1> B_buffer(B, op->getInputs(1)->size());
|
||||
cl::sycl::buffer<T, 1> C_buffer(C, op->getOutput(0)->size());
|
||||
|
||||
// add oneapi::mkl::blas::gemm to execution queue
|
||||
try {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include "operators/reshape.h"
|
||||
#include "intelcpu/mkl_kernel_without_config.h"
|
||||
#include "intelcpu/mkl_runtime.h"
|
||||
|
||||
#include "operators/unsqueeze.h"
|
||||
namespace infini {
|
||||
class MklReshape : public MklKernelWithoutConfig {
|
||||
void compute(const Operator &op,
|
||||
|
@ -47,4 +47,6 @@ REGISTER_KERNEL(Device::INTELCPU, OpType::Identity, DataType::Float32,
|
|||
MklReshape, "Identify_Mkl_Float32");
|
||||
REGISTER_KERNEL(Device::INTELCPU, OpType::Flatten, DataType::Float32,
|
||||
MklReshape, "Flatten_Mkl_Float32");
|
||||
REGISTER_KERNEL(Device::INTELCPU, OpType::Unsqueeze, DataType::Float32,
|
||||
MklReshape, "Unsqueeze_Mkl_Float32");
|
||||
}; // namespace infini
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
#include "operators/constant.h"
|
||||
|
||||
namespace infini {
|
||||
std::string ConstantObj::toString() const {
|
||||
std::ostringstream os;
|
||||
os << "Constant[" << getGuid() << "]";
|
||||
os << "output = " << outputs[0]->getGuid() << ",";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
vector<int> ConstantObj::getWorkloadVector() const {
|
||||
vector<int> ret = outputs[0]->getDims();
|
||||
ret.emplace(ret.begin(), enum_to_underlying(type));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// need eps and momentum?
|
||||
vector<int> ConstantObj::getOpAttrVector() const {
|
||||
return {enum_to_underlying(type)};
|
||||
}
|
||||
} // namespace infini
|
|
@ -28,7 +28,7 @@ optional<vector<Shape>> GatherObj::inferShape(const TensorVec &inputs) const {
|
|||
vector<DataType> GatherObj::inferDataType(const TensorVec &inputs) const {
|
||||
IT_ASSERT(inputs.size() == 2);
|
||||
auto index = inputs[1];
|
||||
IT_ASSERT(index->getDType() == DataType::UInt32);
|
||||
IT_ASSERT(index->getDType() == DataType::Int32);
|
||||
return {inputs[0]->getDType()};
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
#include "operators/unsqueeze.h"
|
||||
|
||||
namespace infini {
|
||||
UnsqueezeObj::UnsqueezeObj(GraphObj *graph, Tensor in,
|
||||
const std::vector<int> &index, Tensor out)
|
||||
: OperatorObj(OpType::Unsqueeze, {in}, {out}) {
|
||||
IT_ASSERT(parseAxis(index, axis));
|
||||
IT_ASSERT(checkValid(graph));
|
||||
}
|
||||
|
||||
bool UnsqueezeObj::parseAxis(const std::vector<int> &index,
|
||||
std::set<int> &axis) const {
|
||||
bool ret = true;
|
||||
int nDim = inputs[0]->getDims().size() + index.size();
|
||||
for (size_t i = 0; i < index.size(); ++i) {
|
||||
int data = index[i];
|
||||
if (data < 0)
|
||||
data += nDim;
|
||||
if (data >= nDim) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
if (axis.find(data) != axis.end()) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
axis.insert(data);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
optional<vector<Shape>>
|
||||
UnsqueezeObj::inferShape(const TensorVec &inputs) const {
|
||||
Shape dims = inputs[0]->getDims();
|
||||
for (int i : axis) {
|
||||
auto it = dims.begin();
|
||||
dims.insert(std::next(it, i), 1);
|
||||
}
|
||||
return {{dims}};
|
||||
}
|
||||
|
||||
std::string UnsqueezeObj::toString() const {
|
||||
std::ostringstream os;
|
||||
os << "Unsqueeze[" << getGuid() << "]";
|
||||
os << "(";
|
||||
os << "inputs=";
|
||||
for (auto i = 0; i < numInputs(); i++)
|
||||
os << inputs[i]->getGuid() << ",";
|
||||
os << "output=" << outputs[0]->getGuid() << ",";
|
||||
os << ")";
|
||||
|
||||
return os.str();
|
||||
}
|
||||
|
||||
vector<int> UnsqueezeObj::getWorkloadVector() const {
|
||||
vector<int> ret = inputs[0]->getDims();
|
||||
ret.emplace(ret.begin(), enum_to_underlying(type));
|
||||
for (auto i : axis)
|
||||
ret.emplace_back(i);
|
||||
return ret;
|
||||
}
|
||||
vector<int> UnsqueezeObj::getOpAttrVector() const {
|
||||
return {enum_to_underlying(type)};
|
||||
}
|
||||
} // namespace infini
|
|
@ -0,0 +1,26 @@
|
|||
#include "core/graph.h"
|
||||
#include "core/runtime.h"
|
||||
#include "intelcpu/mkl_runtime.h"
|
||||
#include "operators/unsqueeze.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
namespace infini {
|
||||
|
||||
TEST(Unsqueeze, Mkl) {
|
||||
Runtime runtime = MklRuntimeObj::getInstance();
|
||||
Graph g = make_ref<GraphObj>(runtime);
|
||||
|
||||
auto input = g->addTensor({2, 3, 3}, DataType::Float32);
|
||||
vector<int> index{1, 0};
|
||||
auto op = g->addOp<UnsqueezeObj>(input, index, nullptr);
|
||||
g->dataMalloc();
|
||||
input->setData(IncrementalGenerator());
|
||||
|
||||
runtime->run(g);
|
||||
|
||||
auto o = g->cloneTensor(op->getOutput(0));
|
||||
// check results
|
||||
EXPECT_TRUE(o->equalData(input));
|
||||
}
|
||||
} // namespace infini
|
|
@ -0,0 +1,29 @@
|
|||
#include "core/graph.h"
|
||||
#include "core/runtime.h"
|
||||
#include "operators/unsqueeze.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
namespace infini {
|
||||
TEST(Unsqueeze, ShapeInfer) {
|
||||
{
|
||||
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||
Graph g = make_ref<GraphObj>(runtime);
|
||||
auto input = g->addTensor({1, 3, 2, 15}, DataType::Float32);
|
||||
vector<int> index{1, 6, 0};
|
||||
auto op = g->addOp<UnsqueezeObj>(input, index, nullptr);
|
||||
|
||||
EXPECT_EQ(op->getOutput(0)->getDims(), (Shape{1, 1, 1, 3, 2, 15, 1}));
|
||||
}
|
||||
|
||||
{
|
||||
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||
Graph g = make_ref<GraphObj>(runtime);
|
||||
auto input = g->addTensor({1, 3, 2, 15}, DataType::Float32);
|
||||
vector<int> index{-6, -1, -7};
|
||||
auto op = g->addOp<UnsqueezeObj>(input, index, nullptr);
|
||||
|
||||
EXPECT_EQ(op->getOutput(0)->getDims(), (Shape{1, 1, 1, 3, 2, 15, 1}));
|
||||
}
|
||||
}
|
||||
} // namespace infini
|
Loading…
Reference in New Issue