fix xpu code, let it can run.

This commit is contained in:
wanghailu 2023-08-22 15:42:53 +08:00
parent a69390e310
commit d18d40a2e9
11 changed files with 48 additions and 82 deletions

View File

@ -12,14 +12,14 @@ class CastXdnn : public XPUKernelWithoutConfig {
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
auto len = op->getInputs(0)->size();
CastObj::CastType type = op->getType();
CastType type = op->getType();
int ret = 0;
switch (type) {
case CastObj::Float2Int32:
case CastType::Float2Int32:
ret = baidu::xpu::api::cast<float,int>(context->XPUHandle(), (float*)aData, (int*)cData, len);
break;
case CastObj::Int322Int8:
case CastType::Int322Int8:
ret = baidu::xpu::api::cast<int,float>(context->XPUHandle(), (int*)aData, (float*)cData, len);
break;
default:

View File

@ -174,30 +174,6 @@ class EqualXdnn : public XPUKernelWithoutConfig {
}
};
class NotEqualXdnn : public XPUKernelWithoutConfig {
void compute(const Operator &_op,
const RuntimeObj *_context) const override {
auto op = as<ElementWiseObj>(_op);
auto context = dynamic_cast<const XPURuntimeObj *>(_context);
void *const aData = (op->getInputs(0)->getRawDataPtr<void *>());
void *const bData = (op->getInputs(1)->getRawDataPtr<void *>());
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
size_t len = op->getOutput()->size();
XPUPtr wsData = context->getWorkspace(len);
auto aDim = op->getInputs(0)->getDims();
auto bDim = op->getInputs(1)->getDims();
if (aDim.size() != 4 || bDim.size() != 4)
IT_TODO_HALT();
auto ret = baidu::xpu::api::broadcast_not_equal<float>(context->XPUHandle(), (float*)aData, (float*)bData, (bool*)wsData, aDim, bDim);
ret = baidu::xpu::api::cast<bool, float>(context->XPUHandle(), (bool*)wsData, (float*)cData, len);
assert(ret == 0);
return;
}
};
class GreaterEqualXdnn : public XPUKernelWithoutConfig {
void compute(const Operator &_op,
const RuntimeObj *_context) const override {
@ -304,20 +280,18 @@ REGISTER_KERNEL(Device::XPU, OpType::Div, DataType::Float32, DivXdnn,
"Div_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::Pow, DataType::Float32, PowXdnn,
"Pow_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::Maximum, DataType::Float32, MaxXdnn,
REGISTER_KERNEL(Device::XPU, OpType::Max, DataType::Float32, MaxXdnn,
"Max_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::Minimum, DataType::Float32, MinXdnn,
REGISTER_KERNEL(Device::XPU, OpType::Min, DataType::Float32, MinXdnn,
"Min_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::Equal, DataType::Float32, EqualXdnn,
"Equal_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::NotEqual, DataType::Float32, NotEqualXdnn,
"NotEqual_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::GreaterEqual, DataType::Float32, GreaterEqualXdnn,
REGISTER_KERNEL(Device::XPU, OpType::GreaterOrEqual, DataType::Float32, GreaterEqualXdnn,
"GreaterEqual_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::GreaterThan, DataType::Float32, GreaterThanXdnn,
REGISTER_KERNEL(Device::XPU, OpType::Greater, DataType::Float32, GreaterThanXdnn,
"GreaterThan_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::LessEqual, DataType::Float32, LessEqualXdnn,
REGISTER_KERNEL(Device::XPU, OpType::LessOrEqual, DataType::Float32, LessEqualXdnn,
"LessEqual_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::LessThan, DataType::Float32, LessThanXdnn,
REGISTER_KERNEL(Device::XPU, OpType::Less, DataType::Float32, LessThanXdnn,
"LessThan_xdnn_XPU_Float32");
}; // namespace infini

View File

@ -30,6 +30,6 @@ class MatmulXdnn : public XPUKernelWithoutConfig {
}
};
REGISTER_KERNEL(Device::XPU, OpType::Matmul, DataType::Float32, MatmulXdnn,
REGISTER_KERNEL(Device::XPU, OpType::MatMul, DataType::Float32, MatmulXdnn,
"Matmul_xdnn_XPU_Float32");
}; // namespace infini

View File

@ -57,6 +57,6 @@ class MaxPooling : public XPUKernelWithoutConfig {
REGISTER_KERNEL(Device::XPU, OpType::MaxPool, DataType::Float32, MaxPooling,
"MaxPool_xdnn_Float32");
REGISTER_KERNEL(Device::XPU, OpType::AvgPool, DataType::Float32, AvgPooling,
REGISTER_KERNEL(Device::XPU, OpType::AveragePool, DataType::Float32, AvgPooling,
"AvgPool_xdnn_Float32");
}; // namespace infini

View File

@ -201,9 +201,9 @@ class CopyXdnn : public XPUKernelWithoutConfig {
void *const cData = (op->getOutput()->getRawDataPtr<void *>());
auto len = op->getInputs(0)->size();
auto ret = baidu::xpu::api::copy<float>(context->XPUHandle(), (float*)aData, (float*)cData, len);
assert(ret == 0);
return;
auto ret = baidu::xpu::api::copy<float>(context->XPUHandle(), (float*)aData, (float*)cData, len);
assert(ret == 0);
return;
}
};
@ -248,8 +248,6 @@ REGISTER_KERNEL(Device::XPU, OpType::Floor, DataType::Float32, FloorXdnn,
"Floor_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::Neg, DataType::Float32, NegXdnn,
"Neg_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::Copy, DataType::Float32, CopyXdnn,
"Copy_xdnn_XPU_Float32");
REGISTER_KERNEL(Device::XPU, OpType::Reciprocal, DataType::Float32, ReciprocalXdnn,
"Reciprocal_xdnn_XPU_Float32");

View File

@ -14,7 +14,7 @@ void XPURuntimeObj::runWithoutSync(const Graph &graph, bool tune = false,
for (auto &op : graph->getOperators()) {
// HACK: set correct data type
auto kernelAttrs =
KernelAttrs{device, op->getOpType(), DataType::Float32};
KernelAttrs{device, op->getOpType().underlying(), op->getDType()};
Kernel *kernel = kernelRegistry.getKernel(kernelAttrs);
auto perfKey = PerfEngine::Key{kernelAttrs, op->getOpPerfKey()};
auto perfData = perfEngine.getPerfData(perfKey);

View File

@ -19,12 +19,8 @@ void testAdd(
// Build input data on CPU
Tensor inputCpu1 =
make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
inputCpu1->dataMalloc();
inputCpu1->setData(generator);
Tensor inputCpu2 =
make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
inputCpu2->dataMalloc();
inputCpu2->setData(generator);
// GPU
Graph xpuGraph = make_ref<GraphObj>(xpuRuntime);
@ -36,12 +32,16 @@ void testAdd(
auto outputGpu = gpuOp->getOutput();
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
// CPU
// Graph cpuGraph = make_ref<GraphObj>(cpuRuntime);
// auto cpuOp = cpuGraph->addOp<T>(inputCpu1, inputCpu2, nullptr);
// cpuGraph->dataMalloc();
// cpuRuntime->run(cpuGraph);
// auto outputCpu = cpuOp->getOutput();
// Check
// Graph cpuGraph = make_ref<GraphObj>(cpuRuntime);
// auto cpuOp = cpuGraph->addOp<T>(inputCpu1, inputCpu2, nullptr);
// cpuGraph->addTensor(inputCpu1);
// cpuGraph->addTensor(inputCpu2);
// cpuGraph->dataMalloc();
// inputCpu1->setData(generator);
// inputCpu2->setData(generator);
// cpuRuntime->run(cpuGraph);
// auto outputCpu = cpuOp->getOutput();
// // Check
// outputCpu->printData();
outputGpu2Cpu->printData();
// EXPECT_TRUE(outputCpu->equalData(outputGpu2Cpu));
@ -54,7 +54,6 @@ TEST(xpu_add, run) {
testAdd<MulObj>(IncrementalGenerator(), Shape{1, 1, 1, 30});
testAdd<DivObj>(IncrementalGenerator(), Shape{1, 1, 1, 30});
testAdd<EqualObj>(IncrementalGenerator(), Shape{1, 1, 1, 30});
testAdd<NotEqualObj>(IncrementalGenerator(), Shape{1, 1, 1, 30});
testAdd<GreaterEqualObj>(IncrementalGenerator(), Shape{1, 1, 1, 30});
testAdd<GreaterThanObj>(IncrementalGenerator(), Shape{1, 1, 1, 30});
testAdd<LessEqualObj>(IncrementalGenerator(), Shape{1, 1, 1, 30});

View File

@ -32,6 +32,8 @@ void testConcat(const std::function<void(void *, size_t, DataType)> &generator,
auto gpuOp =
xpuGraph->addOp<T>(TensorVec{inputGpu1, inputGpu2}, nullptr, 2);
xpuGraph->dataMalloc();
inputGpu1->setData(generator);
inputGpu2->setData(generator);
xpuRuntime->run(xpuGraph);
auto outputGpu = gpuOp->getOutput();
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);

View File

@ -19,13 +19,8 @@ void testConv(const std::function<void(void *, size_t, DataType)> &generatorA,
// Build input data on CPU
Tensor inputCpu1 =
make_ref<TensorObj>(shapeA, DataType::Float32, cpuRuntime);
inputCpu1->dataMalloc();
inputCpu1->setData(generatorA);
Tensor inputCpu2 =
make_ref<TensorObj>(shapeB, DataType::Float32, cpuRuntime);
inputCpu2->dataMalloc();
inputCpu2->setData(generatorB);
// MLU
Graph xpuGraph = make_ref<GraphObj>(xpuRuntime);
auto inputMlu1 = xpuGraph->cloneTensor(inputCpu1);
@ -38,9 +33,13 @@ void testConv(const std::function<void(void *, size_t, DataType)> &generatorA,
auto outputXpu2Cpu = outputXpu->clone(cpuRuntime);
// CPU
Graph cpuGraph = make_ref<GraphObj>(cpuRuntime);
cpuGraph->addTensor(inputCpu1);
cpuGraph->addTensor(inputCpu2);
auto cpuOp =
cpuGraph->addOp<T>(inputCpu1, inputCpu2, nullptr, 1, 1, 1, 1, 1, 1);
cpuGraph->dataMalloc();
inputCpu1->setData(generatorA);
inputCpu2->setData(generatorB);
cpuRuntime->run(cpuGraph);
auto outputCpu = cpuOp->getOutput();
outputCpu->print();

View File

@ -20,12 +20,8 @@ void testMatmul(const std::function<void(void *, size_t, DataType)> &generatorA,
// Build input data on CPU
Tensor inputCpu1 =
make_ref<TensorObj>(shapeA, DataType::Float32, cpuRuntime);
inputCpu1->dataMalloc();
inputCpu1->setData(generatorA);
Tensor inputCpu2 =
make_ref<TensorObj>(shapeB, DataType::Float32, cpuRuntime);
inputCpu2->dataMalloc();
inputCpu2->setData(generatorB);
// MLU
Graph xpuGraph = make_ref<GraphObj>(xpuRuntime);
@ -33,13 +29,19 @@ void testMatmul(const std::function<void(void *, size_t, DataType)> &generatorA,
auto inputMlu2 = xpuGraph->cloneTensor(inputCpu2);
auto mluOp = xpuGraph->addOp<T>(inputMlu1, inputMlu2, nullptr);
xpuGraph->dataMalloc();
inputMlu1->setData(generatorA);
inputMlu2->setData(generatorB);
xpuRuntime->run(xpuGraph);
auto outputMlu = mluOp->getOutput();
auto outputMlu2Cpu = outputMlu->clone(cpuRuntime);
// CPU
Graph cpuGraph = make_ref<GraphObj>(cpuRuntime);
auto cpuOp = cpuGraph->addOp<T>(inputCpu1, inputCpu2, nullptr);
cpuGraph->addTensor(inputCpu1);
cpuGraph->addTensor(inputCpu2);
cpuGraph->dataMalloc();
inputCpu1->setData(generatorA);
inputCpu2->setData(generatorB);
cpuRuntime->run(cpuGraph);
auto outputCpu = cpuOp->getOutput();
outputCpu->print();

View File

@ -17,21 +17,22 @@ void testUnary(const std::function<void(void *, size_t, DataType)> &generator,
// Build input data on CPU
Tensor inputCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
inputCpu->dataMalloc();
inputCpu->setData(generator);
// GPU
Graph xpuGraph = make_ref<GraphObj>(xpuRuntime);
auto inputGpu = xpuGraph->cloneTensor(inputCpu);
auto gpuOp = xpuGraph->addOp<T>(inputGpu, nullptr);
xpuGraph->dataMalloc();
inputGpu->setData(generator);
xpuRuntime->run(xpuGraph);
auto outputGpu = gpuOp->getOutput();
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
// CPU
Graph cpuGraph = make_ref<GraphObj>(cpuRuntime);
auto cpuOp = cpuGraph->addOp<T>(inputCpu, nullptr);
cpuGraph->addTensor(inputCpu);
cpuGraph->dataMalloc();
inputCpu->setData(generator);
cpuRuntime->run(cpuGraph);
auto outputCpu = cpuOp->getOutput();
// Check
@ -46,8 +47,6 @@ void testClip(const std::function<void(void *, size_t, DataType)> &generator,
// Build input data on CPU
Tensor inputCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
inputCpu->dataMalloc();
inputCpu->setData(generator);
float min = 1.0;
float max = 5.0;
@ -56,13 +55,16 @@ void testClip(const std::function<void(void *, size_t, DataType)> &generator,
auto inputGpu = xpuGraph->cloneTensor(inputCpu);
auto gpuOp = xpuGraph->addOp<ClipObj>(inputGpu, nullptr, min, max);
xpuGraph->dataMalloc();
inputGpu->setData(generator);
xpuRuntime->run(xpuGraph);
auto outputGpu = gpuOp->getOutput();
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
// CPU
Graph cpuGraph = make_ref<GraphObj>(cpuRuntime);
auto cpuOp = cpuGraph->addOp<ClipObj>(inputCpu, nullptr, min, max);
cpuGraph->addTensor(inputCpu);
cpuGraph->dataMalloc();
inputCpu->setData(generator);
cpuRuntime->run(cpuGraph);
auto outputCpu = cpuOp->getOutput();
// Check
@ -77,21 +79,22 @@ void testCast(const std::function<void(void *, size_t, DataType)> &generator,
// Build input data on CPU
Tensor inputCpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
inputCpu->dataMalloc();
inputCpu->setData(generator);
// GPU
Graph xpuGraph = make_ref<GraphObj>(xpuRuntime);
auto inputGpu = xpuGraph->cloneTensor(inputCpu);
auto gpuOp = xpuGraph->addOp<CastObj>(inputGpu, nullptr, CastObj::Float2Int32);
auto gpuOp = xpuGraph->addOp<CastObj>(inputGpu, nullptr, CastType::Float2Int32);
xpuGraph->dataMalloc();
inputGpu->setData(generator);
xpuRuntime->run(xpuGraph);
auto outputGpu = gpuOp->getOutput();
auto outputGpu2Cpu = outputGpu->clone(cpuRuntime);
// CPU
Graph cpuGraph = make_ref<GraphObj>(cpuRuntime);
auto cpuOp = cpuGraph->addOp<CastObj>(inputCpu, nullptr, CastObj::Float2Int32);
auto cpuOp = cpuGraph->addOp<CastObj>(inputCpu, nullptr, CastType::Float2Int32);
cpuGraph->addTensor(inputCpu);
cpuGraph->dataMalloc();
inputCpu->setData(generator);
cpuRuntime->run(cpuGraph);
auto outputCpu = cpuOp->getOutput();
// Check
@ -102,17 +105,6 @@ TEST(xdnn_Unary, run) {
testUnary<ReluObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
testUnary<SigmoidObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
testUnary<TanhObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testUnary<SquareObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testUnary<SqrtObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testUnary<RsqrtObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testUnary<ExpObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testUnary<CeilObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testUnary<FloorObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testUnary<NegObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testClip(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testUnary<CopyObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testUnary<ReciprocalObj>(IncrementalGenerator(), Shape{1, 2, 2, 3});
//testCast(IncrementalGenerator(), Shape{1, 2, 2, 3});
}
} // namespace infini