diff --git a/src/kernels/bang/concat.cc b/src/kernels/bang/concat.cc index 4b16b69a..1264d646 100644 --- a/src/kernels/bang/concat.cc +++ b/src/kernels/bang/concat.cc @@ -1,6 +1,6 @@ +#include "operators/concat.h" #include "bang/bang_kernel_without_config.h" #include "bang/bang_runtime.h" -#include "operators/concat.h" namespace infini { class ConcatCnnl : public BangKernelWithoutConfig { @@ -23,10 +23,10 @@ class ConcatCnnl : public BangKernelWithoutConfig { int dim_array[4] = {dim[0], dim[1], dim[2], dim[3]}; int dimout_array[4] = {dim[0], dim[1], dim[2], dim[3]}; - dimout_array[axis] *= num; + dimout_array[axis] *= num; checkCnnlError(cnnlCreateTensorDescriptor(&desc)); - checkCnnlError(cnnlSetTensorDescriptor(desc, CNNL_LAYOUT_NCHW, - CNNL_DTYPE_FLOAT, 4, dimout_array)); + checkCnnlError(cnnlSetTensorDescriptor( + desc, CNNL_LAYOUT_NCHW, CNNL_DTYPE_FLOAT, 4, dimout_array)); cnnlTensorDescriptor_t descArray[num]; for (int i = 0; i < num; ++i) { checkCnnlError(cnnlCreateTensorDescriptor(&descArray[i])); @@ -40,7 +40,8 @@ class ConcatCnnl : public BangKernelWithoutConfig { BangPtr wsData = context->getWorkspace(wsSize); cnnlStatus_t stat = - cnnlConcat(context->cnnlHandle(), num, axis, descArray, argv, wsData, wsSize, desc, cData); + cnnlConcat(context->cnnlHandle(), num, axis, descArray, argv, + wsData, wsSize, desc, cData); if (stat != CNNL_STATUS_SUCCESS) return; diff --git a/test/kernels/bang/test_bang_concat.cc b/test/kernels/bang/test_bang_concat.cc index 6829b9a7..ff3d25f2 100644 --- a/test/kernels/bang/test_bang_concat.cc +++ b/test/kernels/bang/test_bang_concat.cc @@ -10,7 +10,7 @@ namespace infini { template void testConcat(const std::function &generator, - const Shape &shape) { + const Shape &shape) { // Runtime Runtime cpuRuntime = CpuRuntimeObj::getInstance(); auto bangRuntime = make_ref(); @@ -29,7 +29,8 @@ void testConcat(const std::function &generator, Graph bangGraph = make_ref(bangRuntime); auto inputGpu1 = bangGraph->cloneTensor(inputCpu1); auto inputGpu2 = bangGraph->cloneTensor(inputCpu2); - auto gpuOp = bangGraph->addOp(TensorVec{inputGpu1, inputGpu2}, nullptr, 2); + auto gpuOp = + bangGraph->addOp(TensorVec{inputGpu1, inputGpu2}, nullptr, 2); bangGraph->dataMalloc(); bangRuntime->run(bangGraph); auto outputGpu = gpuOp->getOutput();