fix Issue-189 question 1-15 (#195)

* fix: fix nativecpu elementwise only support 4d tensor

* fix format

---------

Co-authored-by: Haojie Wang <haojie0429@gmail.com>
This commit is contained in:
zhangyunze 2024-01-05 08:40:18 +08:00 committed by GitHub
parent 42032356fb
commit b15c4979fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 100 additions and 28 deletions

View File

@ -13,6 +13,11 @@ Shape infer_broadcast(const Shape &A, const Shape &B);
int get_real_axis(const int &axis, const int &rank); int get_real_axis(const int &axis, const int &rank);
// Check if tensor B is unidirectional broadcastable to tensor A // Check if tensor B is unidirectional broadcastable to tensor A
bool is_unidirectional_broadcasting(const Shape &A, const Shape &B); bool is_unidirectional_broadcasting(const Shape &A, const Shape &B);
// Locate the index with size from Shape
Shape locate_index(size_t inputN, const Shape &shape);
// Delocate the ShapeIndex from Shape with broadcast
size_t delocate_index(const Shape &shapeIndex, const Shape &shape,
const Shape &stride);
// Convert KernelAttrs to a string representation // Convert KernelAttrs to a string representation
std::string get_kernel_attrs_str(const KernelAttrs &kernelAttrs); std::string get_kernel_attrs_str(const KernelAttrs &kernelAttrs);
} // namespace infini } // namespace infini

View File

@ -1,5 +1,6 @@
#include "operators/element_wise.h" #include "operators/element_wise.h"
#include "core/kernel.h" #include "core/kernel.h"
#include "utils/operator_utils.h"
namespace infini { namespace infini {
template <typename T> class NativeElementWise : public CpuKernelWithoutConfig { template <typename T> class NativeElementWise : public CpuKernelWithoutConfig {
@ -11,37 +12,34 @@ template <typename T> class NativeElementWise : public CpuKernelWithoutConfig {
T *inptr1 = op->getInputs(1)->getRawDataPtr<T *>(); T *inptr1 = op->getInputs(1)->getRawDataPtr<T *>();
T *outptr = op->getOutput()->getRawDataPtr<T *>(); T *outptr = op->getOutput()->getRawDataPtr<T *>();
int a[4] = {1, 1, 1, 1}; auto shapeA = op->getInputs(0)->getDims();
int b[4] = {1, 1, 1, 1}; auto shapeB = op->getInputs(1)->getDims();
int c[4] = {1, 1, 1, 1}; auto shapeC = op->getOutput()->getDims();
auto a_input = op->getInputs(0)->getDims(); auto rank = op->getOutput()->getRank();
auto b_input = op->getInputs(1)->getDims(); Shape a(rank, 1);
auto c_output = op->getOutput()->getDims(); Shape b(rank, 1);
std::copy(a_input.begin(), a_input.end(), a + (4 - a_input.size())); std::copy(shapeA.begin(), shapeA.end(),
std::copy(b_input.begin(), b_input.end(), b + (4 - b_input.size())); a.begin() + (rank - shapeA.size()));
std::copy(c_output.begin(), c_output.end(), c + (4 - c_output.size())); std::copy(shapeB.begin(), shapeB.end(),
b.begin() + (rank - shapeB.size()));
auto getStride = [&](const Shape &shape) {
int p = 1;
Shape stride(rank);
for (auto i = rank; i > 0; --i) {
stride[i - 1] = p;
p = p * shape[i - 1];
}
return stride;
};
Shape strideA = getStride(a);
Shape strideB = getStride(b);
auto n = op->getOutput()->size(); auto n = op->getOutput()->size();
for (size_t i = 0; i < n; ++i) { for (size_t i = 0; i < n; ++i) {
int c0_index = i / (c[1] * c[2] * c[3]); auto shapeIndexC = locate_index(i, shapeC);
int c1_index = (i % (c[1] * c[2] * c[3])) / (c[2] * c[3]); auto indexA = delocate_index(shapeIndexC, a, strideA);
int c2_index = ((i % (c[1] * c[2] * c[3])) % (c[2] * c[3])) / c[3]; auto indexB = delocate_index(shapeIndexC, b, strideB);
int c3_index = ((i % (c[1] * c[2] * c[3])) % (c[2] * c[3])) % c[3]; outptr[i] = doCompute(inptr0[indexA], inptr1[indexB]);
int a0_index = c0_index % a[0];
int a1_index = c1_index % a[1];
int a2_index = c2_index % a[2];
int a3_index = c3_index % a[3];
int b0_index = c0_index % b[0];
int b1_index = c1_index % b[1];
int b2_index = c2_index % b[2];
int b3_index = c3_index % b[3];
outptr[i] = doCompute(
inptr0[a0_index * a[1] * a[2] * a[3] + a1_index * a[2] * a[3] +
a2_index * a[3] + a3_index],
inptr1[b0_index * b[1] * b[2] * b[3] + b1_index * b[2] * b[3] +
b2_index * b[3] + b3_index]);
} }
} }
}; };

View File

@ -66,6 +66,31 @@ bool is_unidirectional_broadcasting(const Shape &A, const Shape &B) {
return true; return true;
} }
Shape locate_index(size_t inputN, const Shape &shape) {
Shape ans(shape.size());
auto i = ans.rbegin();
auto j = shape.rbegin(), ej = shape.rend();
while (j != ej) {
auto div = std::div(inputN, *j++);
*i++ = div.rem;
inputN = div.quot;
}
return ans;
}
size_t delocate_index(const Shape &shapeIndex, const Shape &shape,
const Shape &stride) {
size_t ans = 0;
Shape index(shapeIndex.size());
IT_ASSERT(shapeIndex.size() == shape.size());
IT_ASSERT(shape.size() == stride.size());
for (size_t i = 0; i < shape.size(); ++i) {
index[i] = shapeIndex[i] % shape[i];
ans += index[i] * stride[i];
}
return ans;
}
std::string device_to_str(Device device) { std::string device_to_str(Device device) {
std::string deviceStr; std::string deviceStr;
switch (device) { switch (device) {

View File

@ -0,0 +1,44 @@
#include "core/graph.h"
#include "core/runtime.h"
#include "operators/element_wise.h"
#include "test.h"
namespace infini {
using ExpectOutput = vector<float>;
template <class T>
void testElementWiseNativeCpu(
const std::function<void(void *, size_t, DataType)> &generator1,
const std::function<void(void *, size_t, DataType)> &generator2,
const Shape &shape1, const Shape &shape2, const ExpectOutput &ansVec) {
Runtime runtime = NativeCpuRuntimeObj::getInstance();
Graph g = make_ref<GraphObj>(runtime);
auto t1 = g->addTensor(shape1, DataType::Float32);
auto t2 = g->addTensor(shape2, DataType::Float32);
auto op = g->addOp<T>(t1, t2, nullptr);
g->dataMalloc();
t1->setData(generator1);
t2->setData(generator2);
runtime->run(g);
EXPECT_TRUE(op->getOutput()->equalData(ansVec));
}
TEST(ElementWise, NativeCpu) {
testElementWiseNativeCpu<AddObj>(
IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1},
Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 4, 5, 6, 6, 7, 8, 10, 11, 12});
testElementWiseNativeCpu<MulObj>(
IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1},
Shape{2, 1, 1}, ExpectOutput{0, 0, 0, 3, 4, 5, 0, 0, 0, 9, 10, 11});
testElementWiseNativeCpu<SubObj>(
IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1},
Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 2, 3, 4, 6, 7, 8, 8, 9, 10});
testElementWiseNativeCpu<DivObj>(
IncrementalGenerator(), OneGenerator(), Shape{1, 2, 2, 3, 1},
Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
}
} // namespace infini