forked from jiuyuan/InfiniTensor
fix Issue-189 question 1-15 (#195)
* fix: fix nativecpu elementwise only support 4d tensor * fix format --------- Co-authored-by: Haojie Wang <haojie0429@gmail.com>
This commit is contained in:
parent
42032356fb
commit
b15c4979fa
|
@ -13,6 +13,11 @@ Shape infer_broadcast(const Shape &A, const Shape &B);
|
||||||
int get_real_axis(const int &axis, const int &rank);
|
int get_real_axis(const int &axis, const int &rank);
|
||||||
// Check if tensor B is unidirectional broadcastable to tensor A
|
// Check if tensor B is unidirectional broadcastable to tensor A
|
||||||
bool is_unidirectional_broadcasting(const Shape &A, const Shape &B);
|
bool is_unidirectional_broadcasting(const Shape &A, const Shape &B);
|
||||||
|
// Locate the index with size from Shape
|
||||||
|
Shape locate_index(size_t inputN, const Shape &shape);
|
||||||
|
// Delocate the ShapeIndex from Shape with broadcast
|
||||||
|
size_t delocate_index(const Shape &shapeIndex, const Shape &shape,
|
||||||
|
const Shape &stride);
|
||||||
// Convert KernelAttrs to a string representation
|
// Convert KernelAttrs to a string representation
|
||||||
std::string get_kernel_attrs_str(const KernelAttrs &kernelAttrs);
|
std::string get_kernel_attrs_str(const KernelAttrs &kernelAttrs);
|
||||||
} // namespace infini
|
} // namespace infini
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#include "operators/element_wise.h"
|
#include "operators/element_wise.h"
|
||||||
#include "core/kernel.h"
|
#include "core/kernel.h"
|
||||||
|
#include "utils/operator_utils.h"
|
||||||
|
|
||||||
namespace infini {
|
namespace infini {
|
||||||
template <typename T> class NativeElementWise : public CpuKernelWithoutConfig {
|
template <typename T> class NativeElementWise : public CpuKernelWithoutConfig {
|
||||||
|
@ -11,37 +12,34 @@ template <typename T> class NativeElementWise : public CpuKernelWithoutConfig {
|
||||||
T *inptr1 = op->getInputs(1)->getRawDataPtr<T *>();
|
T *inptr1 = op->getInputs(1)->getRawDataPtr<T *>();
|
||||||
T *outptr = op->getOutput()->getRawDataPtr<T *>();
|
T *outptr = op->getOutput()->getRawDataPtr<T *>();
|
||||||
|
|
||||||
int a[4] = {1, 1, 1, 1};
|
auto shapeA = op->getInputs(0)->getDims();
|
||||||
int b[4] = {1, 1, 1, 1};
|
auto shapeB = op->getInputs(1)->getDims();
|
||||||
int c[4] = {1, 1, 1, 1};
|
auto shapeC = op->getOutput()->getDims();
|
||||||
auto a_input = op->getInputs(0)->getDims();
|
auto rank = op->getOutput()->getRank();
|
||||||
auto b_input = op->getInputs(1)->getDims();
|
Shape a(rank, 1);
|
||||||
auto c_output = op->getOutput()->getDims();
|
Shape b(rank, 1);
|
||||||
std::copy(a_input.begin(), a_input.end(), a + (4 - a_input.size()));
|
std::copy(shapeA.begin(), shapeA.end(),
|
||||||
std::copy(b_input.begin(), b_input.end(), b + (4 - b_input.size()));
|
a.begin() + (rank - shapeA.size()));
|
||||||
std::copy(c_output.begin(), c_output.end(), c + (4 - c_output.size()));
|
std::copy(shapeB.begin(), shapeB.end(),
|
||||||
|
b.begin() + (rank - shapeB.size()));
|
||||||
|
auto getStride = [&](const Shape &shape) {
|
||||||
|
int p = 1;
|
||||||
|
Shape stride(rank);
|
||||||
|
for (auto i = rank; i > 0; --i) {
|
||||||
|
stride[i - 1] = p;
|
||||||
|
p = p * shape[i - 1];
|
||||||
|
}
|
||||||
|
return stride;
|
||||||
|
};
|
||||||
|
Shape strideA = getStride(a);
|
||||||
|
Shape strideB = getStride(b);
|
||||||
|
|
||||||
auto n = op->getOutput()->size();
|
auto n = op->getOutput()->size();
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
int c0_index = i / (c[1] * c[2] * c[3]);
|
auto shapeIndexC = locate_index(i, shapeC);
|
||||||
int c1_index = (i % (c[1] * c[2] * c[3])) / (c[2] * c[3]);
|
auto indexA = delocate_index(shapeIndexC, a, strideA);
|
||||||
int c2_index = ((i % (c[1] * c[2] * c[3])) % (c[2] * c[3])) / c[3];
|
auto indexB = delocate_index(shapeIndexC, b, strideB);
|
||||||
int c3_index = ((i % (c[1] * c[2] * c[3])) % (c[2] * c[3])) % c[3];
|
outptr[i] = doCompute(inptr0[indexA], inptr1[indexB]);
|
||||||
|
|
||||||
int a0_index = c0_index % a[0];
|
|
||||||
int a1_index = c1_index % a[1];
|
|
||||||
int a2_index = c2_index % a[2];
|
|
||||||
int a3_index = c3_index % a[3];
|
|
||||||
|
|
||||||
int b0_index = c0_index % b[0];
|
|
||||||
int b1_index = c1_index % b[1];
|
|
||||||
int b2_index = c2_index % b[2];
|
|
||||||
int b3_index = c3_index % b[3];
|
|
||||||
outptr[i] = doCompute(
|
|
||||||
inptr0[a0_index * a[1] * a[2] * a[3] + a1_index * a[2] * a[3] +
|
|
||||||
a2_index * a[3] + a3_index],
|
|
||||||
inptr1[b0_index * b[1] * b[2] * b[3] + b1_index * b[2] * b[3] +
|
|
||||||
b2_index * b[3] + b3_index]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -66,6 +66,31 @@ bool is_unidirectional_broadcasting(const Shape &A, const Shape &B) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Shape locate_index(size_t inputN, const Shape &shape) {
|
||||||
|
Shape ans(shape.size());
|
||||||
|
auto i = ans.rbegin();
|
||||||
|
auto j = shape.rbegin(), ej = shape.rend();
|
||||||
|
while (j != ej) {
|
||||||
|
auto div = std::div(inputN, *j++);
|
||||||
|
*i++ = div.rem;
|
||||||
|
inputN = div.quot;
|
||||||
|
}
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t delocate_index(const Shape &shapeIndex, const Shape &shape,
|
||||||
|
const Shape &stride) {
|
||||||
|
size_t ans = 0;
|
||||||
|
Shape index(shapeIndex.size());
|
||||||
|
IT_ASSERT(shapeIndex.size() == shape.size());
|
||||||
|
IT_ASSERT(shape.size() == stride.size());
|
||||||
|
for (size_t i = 0; i < shape.size(); ++i) {
|
||||||
|
index[i] = shapeIndex[i] % shape[i];
|
||||||
|
ans += index[i] * stride[i];
|
||||||
|
}
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
std::string device_to_str(Device device) {
|
std::string device_to_str(Device device) {
|
||||||
std::string deviceStr;
|
std::string deviceStr;
|
||||||
switch (device) {
|
switch (device) {
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
#include "core/graph.h"
|
||||||
|
#include "core/runtime.h"
|
||||||
|
#include "operators/element_wise.h"
|
||||||
|
|
||||||
|
#include "test.h"
|
||||||
|
|
||||||
|
namespace infini {
|
||||||
|
|
||||||
|
using ExpectOutput = vector<float>;
|
||||||
|
template <class T>
|
||||||
|
void testElementWiseNativeCpu(
|
||||||
|
const std::function<void(void *, size_t, DataType)> &generator1,
|
||||||
|
const std::function<void(void *, size_t, DataType)> &generator2,
|
||||||
|
const Shape &shape1, const Shape &shape2, const ExpectOutput &ansVec) {
|
||||||
|
Runtime runtime = NativeCpuRuntimeObj::getInstance();
|
||||||
|
Graph g = make_ref<GraphObj>(runtime);
|
||||||
|
auto t1 = g->addTensor(shape1, DataType::Float32);
|
||||||
|
auto t2 = g->addTensor(shape2, DataType::Float32);
|
||||||
|
|
||||||
|
auto op = g->addOp<T>(t1, t2, nullptr);
|
||||||
|
g->dataMalloc();
|
||||||
|
t1->setData(generator1);
|
||||||
|
t2->setData(generator2);
|
||||||
|
|
||||||
|
runtime->run(g);
|
||||||
|
EXPECT_TRUE(op->getOutput()->equalData(ansVec));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ElementWise, NativeCpu) {
|
||||||
|
testElementWiseNativeCpu<AddObj>(
|
||||||
|
IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1},
|
||||||
|
Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 4, 5, 6, 6, 7, 8, 10, 11, 12});
|
||||||
|
testElementWiseNativeCpu<MulObj>(
|
||||||
|
IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1},
|
||||||
|
Shape{2, 1, 1}, ExpectOutput{0, 0, 0, 3, 4, 5, 0, 0, 0, 9, 10, 11});
|
||||||
|
testElementWiseNativeCpu<SubObj>(
|
||||||
|
IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1},
|
||||||
|
Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 2, 3, 4, 6, 7, 8, 8, 9, 10});
|
||||||
|
testElementWiseNativeCpu<DivObj>(
|
||||||
|
IncrementalGenerator(), OneGenerator(), Shape{1, 2, 2, 3, 1},
|
||||||
|
Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace infini
|
Loading…
Reference in New Issue