forked from jiuyuan/InfiniTensor
modiefied format, replace layernorm as instancenorm
This commit is contained in:
parent
0fcaf001c4
commit
6a89946736
|
@ -38,7 +38,7 @@ class GraphHandlerObj {
|
||||||
Tensor layerNormalization(Tensor input, Tensor scale, Tensor output,
|
Tensor layerNormalization(Tensor input, Tensor scale, Tensor output,
|
||||||
Tensor bias, float eps, int axis, int stash_type);
|
Tensor bias, float eps, int axis, int stash_type);
|
||||||
Tensor instanceNormalization(Tensor input, Tensor output, Tensor scale,
|
Tensor instanceNormalization(Tensor input, Tensor output, Tensor scale,
|
||||||
Tensor bias, float eps);
|
Tensor bias, float eps);
|
||||||
Tensor rmsNorm(Tensor input, Tensor weight, Tensor output);
|
Tensor rmsNorm(Tensor input, Tensor weight, Tensor output);
|
||||||
|
|
||||||
Tensor maxPool(Tensor input, Tensor output, int kh, int kw, int dh, int dw,
|
Tensor maxPool(Tensor input, Tensor output, int kh, int kw, int dh, int dw,
|
||||||
|
|
|
@ -7,12 +7,11 @@ class InstanceNormObj : public OperatorObj {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
InstanceNormObj(GraphObj *graph, Tensor input, Tensor output, Tensor scale,
|
InstanceNormObj(GraphObj *graph, Tensor input, Tensor output, Tensor scale,
|
||||||
Tensor bias , float eps = 1e-5);
|
Tensor bias, float eps = 1e-5);
|
||||||
OP_CLONE(InstanceNormObj);
|
OP_CLONE(InstanceNormObj);
|
||||||
optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
|
optional<vector<Shape>> inferShape(const TensorVec &inputs) override;
|
||||||
std::string toString() const override;
|
std::string toString() const override;
|
||||||
|
|
||||||
|
|
||||||
int numInputs() const override { return inputs.size(); }
|
int numInputs() const override { return inputs.size(); }
|
||||||
int numOutputs() const override { return outputs.size(); }
|
int numOutputs() const override { return outputs.size(); }
|
||||||
float getEps() const { return eps; }
|
float getEps() const { return eps; }
|
||||||
|
|
|
@ -325,9 +325,9 @@ class OnnxStub:
|
||||||
)
|
)
|
||||||
elif node.op_type == "InstanceNormalization":
|
elif node.op_type == "InstanceNormalization":
|
||||||
(input, scale, bias) = (tensors[node.input[i]] for i in [0, 1, 2])
|
(input, scale, bias) = (tensors[node.input[i]] for i in [0, 1, 2])
|
||||||
|
|
||||||
output = tensors.get(node.output[0])
|
output = tensors.get(node.output[0])
|
||||||
|
|
||||||
tensors[node.output[0]] = self.handler.instanceNormalization(
|
tensors[node.output[0]] = self.handler.instanceNormalization(
|
||||||
input,
|
input,
|
||||||
output,
|
output,
|
||||||
|
@ -337,7 +337,6 @@ class OnnxStub:
|
||||||
(attr.f for attr in node.attribute if attr.name == "epsilon"),
|
(attr.f for attr in node.attribute if attr.name == "epsilon"),
|
||||||
1e-5,
|
1e-5,
|
||||||
),
|
),
|
||||||
|
|
||||||
)
|
)
|
||||||
elif node.op_type == "RMSNorm":
|
elif node.op_type == "RMSNorm":
|
||||||
tensors[node.output[0]] = self.handler.RMSNorm(
|
tensors[node.output[0]] = self.handler.RMSNorm(
|
||||||
|
|
|
@ -9,8 +9,8 @@
|
||||||
#include "operators/element_wise.h"
|
#include "operators/element_wise.h"
|
||||||
#include "operators/expand.h"
|
#include "operators/expand.h"
|
||||||
#include "operators/gather.h"
|
#include "operators/gather.h"
|
||||||
#include "operators/layer_norm.h"
|
|
||||||
#include "operators/instance_norm.h"
|
#include "operators/instance_norm.h"
|
||||||
|
#include "operators/layer_norm.h"
|
||||||
#include "operators/lrn.h"
|
#include "operators/lrn.h"
|
||||||
#include "operators/matmul.h"
|
#include "operators/matmul.h"
|
||||||
#include "operators/pad.h"
|
#include "operators/pad.h"
|
||||||
|
@ -125,17 +125,17 @@ Tensor GraphHandlerObj::layerNormalization(Tensor input, Tensor scale,
|
||||||
->getOutput();
|
->getOutput();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Tensor GraphHandlerObj::instanceNormalization(Tensor input,
|
Tensor GraphHandlerObj::instanceNormalization(Tensor input, Tensor output,
|
||||||
Tensor output, Tensor scale, Tensor bias,
|
Tensor scale, Tensor bias,
|
||||||
float eps) {
|
float eps) {
|
||||||
if (output) {
|
if (output) {
|
||||||
g->addOpWithOutputs<InstanceNormObj>(std::move(input), output, std::move(scale),
|
g->addOpWithOutputs<InstanceNormObj>(
|
||||||
std::move(bias), eps);
|
std::move(input), output, std::move(scale), std::move(bias), eps);
|
||||||
return output;
|
return output;
|
||||||
} else {
|
} else {
|
||||||
return g
|
return g
|
||||||
->addOp<InstanceNormObj>(std::move(input), output, std::move(scale),
|
->addOp<InstanceNormObj>(std::move(input), output, std::move(scale),
|
||||||
std::move(bias), eps)
|
std::move(bias), eps)
|
||||||
->getOutput();
|
->getOutput();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -529,7 +529,8 @@ void init_graph_builder(py::module &m) {
|
||||||
.def("matmul", &Handler::matmul, policy::move)
|
.def("matmul", &Handler::matmul, policy::move)
|
||||||
.def("batchNormalization", &Handler::batchNormalization, policy::move)
|
.def("batchNormalization", &Handler::batchNormalization, policy::move)
|
||||||
.def("layerNormalization", &Handler::layerNormalization, policy::move)
|
.def("layerNormalization", &Handler::layerNormalization, policy::move)
|
||||||
.def("instanceNormalization", &Handler::instanceNormalization, policy::move)
|
.def("instanceNormalization", &Handler::instanceNormalization,
|
||||||
|
policy::move)
|
||||||
.def("RMSNorm", &Handler::rmsNorm, policy::move)
|
.def("RMSNorm", &Handler::rmsNorm, policy::move)
|
||||||
.def("maxPool", &Handler::maxPool, policy::move)
|
.def("maxPool", &Handler::maxPool, policy::move)
|
||||||
.def("avgPool", &Handler::avgPool, policy::move)
|
.def("avgPool", &Handler::avgPool, policy::move)
|
||||||
|
|
|
@ -27,13 +27,13 @@ class InstanceNormAclnn : public ASCENDKernelWithoutConfig {
|
||||||
|
|
||||||
std::vector<int64_t> inputDim = castTo64(inputD);
|
std::vector<int64_t> inputDim = castTo64(inputD);
|
||||||
std::vector<int64_t> inputStride = castTo64(inputS);
|
std::vector<int64_t> inputStride = castTo64(inputS);
|
||||||
std::vector<int64_t> weightDim = castTo64(weightD);
|
std::vector<int64_t> weightDim = castTo64(weightD);
|
||||||
std::vector<int64_t> weightStride = castTo64(weightS);
|
std::vector<int64_t> weightStride = castTo64(weightS);
|
||||||
std::vector<int64_t> outputDim = castTo64(outD);
|
std::vector<int64_t> outputDim = castTo64(outD);
|
||||||
std::vector<int64_t> outputStride = castTo64(outS);
|
std::vector<int64_t> outputStride = castTo64(outS);
|
||||||
|
|
||||||
auto axis = 3;
|
auto axis = 3;
|
||||||
|
|
||||||
auto rank = static_cast<int>(inputDim.size());
|
auto rank = static_cast<int>(inputDim.size());
|
||||||
std::vector<int64_t> normalizedShape(rank - axis, 0);
|
std::vector<int64_t> normalizedShape(rank - axis, 0);
|
||||||
for (auto i = rank; i > axis; --i) {
|
for (auto i = rank; i > axis; --i) {
|
||||||
|
@ -86,7 +86,7 @@ class InstanceNormAclnn : public ASCENDKernelWithoutConfig {
|
||||||
if (workspaceSize > 0) {
|
if (workspaceSize > 0) {
|
||||||
workspaceAddr = context->getWorkspace(workspaceSize);
|
workspaceAddr = context->getWorkspace(workspaceSize);
|
||||||
}
|
}
|
||||||
auto tmp_err_msg = aclGetRecentErrMsg();
|
auto tmp_err_msg = aclGetRecentErrMsg();
|
||||||
if (tmp_err_msg != NULL) {
|
if (tmp_err_msg != NULL) {
|
||||||
printf(" ERROR Message : %s \n ", tmp_err_msg);
|
printf(" ERROR Message : %s \n ", tmp_err_msg);
|
||||||
}
|
}
|
||||||
|
@ -103,7 +103,7 @@ class InstanceNormAclnn : public ASCENDKernelWithoutConfig {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
REGISTER_KERNEL(Device::ASCEND, OpType::InstanceNormalization, InstanceNormAclnn,
|
REGISTER_KERNEL(Device::ASCEND, OpType::InstanceNormalization,
|
||||||
"InstanceNorm_ASCEND");
|
InstanceNormAclnn, "InstanceNorm_ASCEND");
|
||||||
|
|
||||||
}; // namespace infini
|
}; // namespace infini
|
||||||
|
|
|
@ -2,14 +2,12 @@
|
||||||
#include "utils/operator_utils.h"
|
#include "utils/operator_utils.h"
|
||||||
|
|
||||||
namespace infini {
|
namespace infini {
|
||||||
InstanceNormObj::InstanceNormObj(GraphObj *graph, Tensor input, Tensor output, Tensor scale,
|
InstanceNormObj::InstanceNormObj(GraphObj *graph, Tensor input, Tensor output,
|
||||||
Tensor bias,
|
Tensor scale, Tensor bias, float eps)
|
||||||
float eps)
|
: OperatorObj(OpType::InstanceNormalization, TensorVec{input, scale, bias},
|
||||||
: OperatorObj(OpType::InstanceNormalization,
|
|
||||||
TensorVec{input, scale, bias},
|
|
||||||
{output}),
|
{output}),
|
||||||
eps(eps) {
|
eps(eps) {
|
||||||
|
|
||||||
IT_ASSERT(checkValid(graph));
|
IT_ASSERT(checkValid(graph));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,18 +8,17 @@
|
||||||
|
|
||||||
namespace infini {
|
namespace infini {
|
||||||
|
|
||||||
void test_instancenormFp32(
|
void test_instancenormFp32(const Shape &inputShape,
|
||||||
const Shape &inputShape, const vector<float> &inputData,
|
const vector<float> &inputData,
|
||||||
const Shape &scaleShape, const vector<float> &scaleData, float eps,
|
const Shape &scaleShape,
|
||||||
const vector<float> &ExpectData,
|
const vector<float> &scaleData, float eps,
|
||||||
const Shape &biasShape,
|
const vector<float> &ExpectData,
|
||||||
const vector<float> &biasData) {
|
const Shape &biasShape,
|
||||||
|
const vector<float> &biasData) {
|
||||||
|
|
||||||
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||||
Graph gCpu = make_ref<GraphObj>(runtime);
|
Graph gCpu = make_ref<GraphObj>(runtime);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
auto bias = gCpu->addTensor(biasShape, DataType::Float32);
|
auto bias = gCpu->addTensor(biasShape, DataType::Float32);
|
||||||
auto input = gCpu->addTensor(inputShape, DataType::Float32);
|
auto input = gCpu->addTensor(inputShape, DataType::Float32);
|
||||||
auto scale = gCpu->addTensor(scaleShape, DataType::Float32);
|
auto scale = gCpu->addTensor(scaleShape, DataType::Float32);
|
||||||
|
@ -44,9 +43,8 @@ void test_instancenormFp32(
|
||||||
scaleNpu->copyin(scaleData);
|
scaleNpu->copyin(scaleData);
|
||||||
ascendRuntime->run(gAscend);
|
ascendRuntime->run(gAscend);
|
||||||
|
|
||||||
auto oCpu =
|
auto oCpu = gCpu->cloneTensor(op->getOutput()); // move Data from npu to cpu
|
||||||
gCpu->cloneTensor(op->getOutput()); // move Data from npu to cpu
|
oCpu->printData(); //->printData
|
||||||
oCpu->printData(); //->printData
|
|
||||||
EXPECT_TRUE(oCpu->equalData(ExpectData));
|
EXPECT_TRUE(oCpu->equalData(ExpectData));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,7 +65,6 @@ TEST(CUDA_InstancenormFp32, run) {
|
||||||
-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678,
|
-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678,
|
||||||
-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678},
|
-0.3674207, 0.0000000, 0.6123678, -0.3674207, 0.0000000, 0.6123678},
|
||||||
Shape{3}, vector<float>{0, 0, 0});
|
Shape{3}, vector<float>{0, 0, 0});
|
||||||
|
|
||||||
|
|
||||||
aclFinalize();
|
aclFinalize();
|
||||||
} // python output
|
} // python output
|
||||||
|
|
Loading…
Reference in New Issue