refactor: 整合操作张量数据的方法

Signed-off-by: YdrMaster <ydrml@hotmail.com>
This commit is contained in:
YdrMaster 2023-03-21 14:00:04 +08:00
parent e1c976568d
commit 9db97eb212
17 changed files with 133 additions and 143 deletions

View File

@ -90,15 +90,15 @@ class GraphHandlerObj {
inline void data_malloc() { g->dataMalloc(); }
inline void copy_int32(Tensor tensor, std::vector<int32_t> list) {
tensor->copyData(list);
tensor->copyin(list);
}
inline void copy_int64(Tensor tensor, std::vector<int64_t> list) {
tensor->copyData(list);
tensor->copyin(list);
}
inline void copy_float(Tensor tensor, std::vector<float> list) {
tensor->copyData(list);
tensor->copyin(list);
}
inline void run() { g->getRuntime()->run(g); }

View File

@ -15,39 +15,61 @@ using Shape = vector<ShapeElem>;
class TensorObj : public TensorBaseObj {
private:
Shape shape;
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
// scratch have a new id.
size_t _size; // Cache of Π(shape).
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
// scratch have a new id.
inline void copyin(const void *ptr, size_t size) {
runtime->copyBlobFromCPU(getRawDataPtr<void *>(), ptr, size);
}
inline void copyout(void *ptr, size_t size) const {
runtime->copyBlobToCPU(ptr, getRawDataPtr<void *>(), size);
}
public:
TensorObj(const Shape &shape, DataType dtype, Runtime runtime);
TensorObj(Shape shape, DataType dtype, Runtime runtime);
virtual ~TensorObj() {}
string toString() const override;
size_t size() const;
size_t getBytes() const;
inline size_t size() const { return _size; }
inline size_t getBytes() const { return _size * dtype.getSize(); }
Shape getDims() const { return shape; }
vector<size_t> getStride() const;
size_t getOffset(const Shape &ds) const;
using TensorBaseObj::getData;
VType getData(const Shape &pos) const;
size_t getOffset(const vector<int> &ds) const;
void dataMalloc();
inline UidBaseType getFuid() const { return fuid; }
void load(std::string file_path);
void save(std::string file_path);
template <typename T> void copyData(const T *dptr) {
// Copy elements from `data`.
template <typename T> inline void copyin(const vector<T> &data) {
IT_ASSERT(DataType::get<T>() == dtype);
IT_ASSERT(data != nullptr);
runtime->copyBlobFromCPU(getRawDataPtr<void *>(), dptr, getBytes());
IT_ASSERT(data.size() >= _size);
copyin(data.data(), getBytes());
}
// Copy all the elements to a vector.
template <typename T> inline auto copyout() const {
IT_ASSERT(DataType::get<T>() == dtype);
std::vector<T> ans(_size);
copyout(ans.data(), getBytes());
return ans;
}
// Copy the element at `pos`.
template <typename T> inline auto copyout(const vector<int> &pos) const {
IT_ASSERT(DataType::get<T>() == dtype);
auto offset = getOffset(pos);
auto bytes = dtype.getSize();
T ans;
runtime->copyBlobToCPU(&ans, getRawDataPtr<void *>() + offset * bytes,
bytes);
return ans;
}
template <typename T> void copyData(vector<T> dataVector) {
IT_ASSERT(DataType::get<T>() == dtype);
IT_ASSERT(dataVector.size() >= size());
copyData(dataVector.data());
}
inline auto copyoutFloat() const { return copyout<float>(); }
inline auto copyoutInt32() const { return copyout<int32_t>(); }
inline auto copyoutInt64() const { return copyout<int64_t>(); }
void copyData(const TensorObj *src);
void copyData(const Tensor &src) { copyData(src.get()); }
@ -72,24 +94,6 @@ class TensorObj : public TensorBaseObj {
}
return obj;
}
inline std::vector<float> cloneFloats() const {
IT_ASSERT(data != nullptr);
IT_ASSERT(getDType() == DataType::Float32);
std::vector<float> ans(size());
auto src = getRawDataPtr<void *>();
auto dst = ans.data();
auto bytes = getBytes();
if (runtime->isCpu()) {
memcpy(dst, src, bytes);
} else {
#if USE_CUDA
cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
#else
IT_TODO_HALT();
#endif
}
return ans;
}
void printData() const;
bool equalData(const Tensor &rhs) const;

View File

@ -41,7 +41,6 @@ class TensorBaseObj : public Object {
IT_ASSERT(data != nullptr);
return data->getPtr<T>();
}
VType getData(size_t offset) const;
DataType getDType() const { return dtype; }
Runtime getRuntime() const { return runtime; }

View File

@ -3,15 +3,16 @@
#include "core/operator.h"
#include "core/runtime.h"
#include "utils/dataloader.h"
#include <numeric>
namespace infini {
TensorObj::TensorObj(const Shape &shape, DataType dtype, Runtime runtime)
: TensorBaseObj(shape.size(), dtype, runtime), shape(shape) {}
VType TensorObj::getData(const Shape &pos) const {
return getData(getOffset(pos));
}
TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime)
: TensorBaseObj(shape.size(), dtype, runtime), shape(std::move(shape_)),
_size(shape.empty()
? 0
: std::accumulate(shape.begin(), shape.end(), 1,
[](auto acc, auto x) { return acc * x; })) {}
string TensorObj::toString() const {
string ret = "Tensor " + std::to_string(guid) + ", Fuid " +
@ -28,7 +29,7 @@ string TensorObj::toString() const {
return ret;
}
size_t TensorObj::getOffset(const Shape &pos) const {
size_t TensorObj::getOffset(const vector<int> &pos) const {
auto nDim = pos.size();
IT_ASSERT(shape.size() == nDim);
if (pos.empty())
@ -53,15 +54,6 @@ vector<size_t> TensorObj::getStride() const {
return ret;
}
size_t TensorObj::size() const {
size_t ret = 1;
for (const auto &d : shape)
ret *= d;
return ret;
}
size_t TensorObj::getBytes() const { return size() * dtype.getSize(); }
void TensorObj::printData() const {
IT_ASSERT(data != nullptr);
if (!runtime->isCpu())

View File

@ -6,9 +6,4 @@ namespace infini {
TensorBaseObj::TensorBaseObj(int dim, DataType dtype, Runtime runtime)
: dim(dim), dtype(dtype), runtime(runtime) {}
VType TensorBaseObj::getData(size_t offset) const {
// TODO: check cuda array
return (data->getPtr<VType *>())[offset];
}
}; // namespace infini
}; // namespace infini

View File

@ -177,7 +177,7 @@ void init_graph_builder(py::module &m) {
py::class_<TensorObj, std::shared_ptr<TensorObj>>(m, "Tensor")
.def("fuid", &TensorObj::getFuid, policy::automatic)
.def("shape", &TensorObj::getDims, policy::move)
.def("cloneFloats", &TensorObj::cloneFloats, policy::move)
.def("copyoutFloat", &TensorObj::copyoutFloat, policy::move)
.def("has_target", &TensorObj::hasTarget, policy::automatic)
.def("src", &TensorObj::getOutputOf, policy::move);
py::class_<OperatorObj, std::shared_ptr<OperatorObj>>(m, "Operator")

View File

@ -64,7 +64,7 @@ class MemboundInterpreter : public Kernel {
vector<uint32_t> valsUint(vals.size());
for (size_t i = 0; i < vals.size(); ++i)
valsUint[i] = (uint32_t)vals[i];
output->copyData(valsUint);
output->copyin(valsUint);
}
void compute(const Operator &op, const RuntimeObj *context) const override {
@ -81,4 +81,4 @@ class MemboundInterpreter : public Kernel {
REGISTER_KERNEL(Device::CPU, OpType::MemBound, DataType::UInt32,
MemboundInterpreter, "MemboundInterpreter_CPU");
} // namespace infini
} // namespace infini

View File

@ -59,13 +59,13 @@ void loadTensorData(TensorObj *tensor, std::string file_path) {
for (int i = 0; i < temp.data_float_size(); ++i) {
data_temp.push_back(temp.data_float(i));
}
tensor->copyData(data_temp);
tensor->copyin(data_temp);
} else if (tensor->getDType() == DataType::UInt32) {
std::vector<uint32_t> data_temp;
for (int i = 0; i < temp.data_uint32_size(); ++i) {
data_temp.push_back(temp.data_uint32(i));
}
tensor->copyData(data_temp);
tensor->copyin(data_temp);
} else {
IT_TODO_HALT();
}

View File

@ -15,8 +15,8 @@ TEST(Graph, build_and_run) {
Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32);
g->dataMalloc();
i0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
i0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto matmul = g->addOpWithOutputs<MatmulObj>(i0, w0, o0);
g->print();
// check inputOf and outputsOf for tensor
@ -33,7 +33,7 @@ TEST(Graph, build_and_run) {
// check execution results
auto ans = make_ref<TensorObj>(Shape{1, 2, 4}, DataType::UInt32, runtime);
ans->dataMalloc();
ans->copyData(vector<uint32_t>{38, 44, 50, 56, 83, 98, 113, 128});
ans->copyin(vector<uint32_t>{38, 44, 50, 56, 83, 98, 113, 128});
EXPECT_TRUE(o0->equalData(ans));
}
@ -84,8 +84,8 @@ TEST(Graph, perf_engine) {
auto matmul = g->addOp<MatmulObj>(i0, w0, nullptr);
g->dataMalloc();
i0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
i0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
runtime->run(g, true, true);
double perfTime = runtime->getPerfTime(g);
// The example matmul takes 0.0036ms with one core
@ -94,7 +94,7 @@ TEST(Graph, perf_engine) {
// check answer
auto ans = make_ref<TensorObj>(Shape{1, 2, 4}, DataType::UInt32, runtime);
ans->dataMalloc();
ans->copyData(vector<uint32_t>{38, 44, 50, 56, 83, 98, 113, 128});
ans->copyin(vector<uint32_t>{38, 44, 50, 56, 83, 98, 113, 128});
EXPECT_TRUE(matmul->getOutput()->equalData(ans));
}
@ -105,8 +105,8 @@ TEST(Graph, test_tensor_id) {
Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32);
g->dataMalloc();
i0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
i0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto i1 = g->addTensor(i0->clone());
auto matmul = g->addOpWithOutputs<MatmulObj>(i0, w0, o0);
g->print();
@ -123,8 +123,8 @@ TEST(Graph, test_OpVec_ctor) {
Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32);
g->dataMalloc();
i0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
i0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
auto o1 = g->addTensor(o0->clone());
auto matmul = g->addOpWithOutputs<MatmulObj>(i0, w0, o0);
g->addOp<ReluObj>(o1, nullptr);

View File

@ -19,8 +19,8 @@ namespace infini {
// Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
// Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32);
// g->dataMalloc();
// i0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
// w0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
// i0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
// w0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
// auto matmul = g->addOpWithOutputs<MatmulObj>(i0, w0, o0);
// g->print();
// // check inputOf and outputsOf for tensor

View File

@ -14,10 +14,10 @@ TEST(Prtotbuf, save_and_load) {
Tensor u0 = g->addTensor({1, 3, 4}, DataType::UInt32);
Tensor u1 = g->addTensor({1, 3, 4}, DataType::UInt32);
g->dataMalloc();
i0->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyData(vector<float>{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
u0->copyData(vector<uint32_t>{1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 0, 0});
u1->copyData(vector<uint32_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0});
i0->copyin(vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyin(vector<float>{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
u0->copyin(vector<uint32_t>{1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 0, 0});
u1->copyin(vector<uint32_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0});
i0->save("i0.pb");
w0->printData();
w0->load("i0.pb");

View File

@ -22,8 +22,8 @@ TEST(CUDA_BatchNorm, run) {
// Build input data on CPU
gCpu->dataMalloc();
iCpu->setData(IncrementalGenerator());
meanCpu->copyData(vector<float>{1, 6, 9});
varCpu->copyData(vector<float>{4, 1, 9});
meanCpu->copyin(vector<float>{1, 6, 9});
varCpu->copyin(vector<float>{4, 1, 9});
scaleCpu->setData(OneGenerator());
biasCpu->setData(ZeroGenerator());

View File

@ -181,8 +181,8 @@ TEST(Gather, Cuda) {
auto input = gCpu->addTensor({3, 2}, DataType::Float32);
auto index = gCpu->addTensor({2, 2}, DataType::UInt32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4, 5, 6});
index->copyData(vector<uint32_t>{0, 1, 1, 2});
input->copyin(vector<float>{1, 2, 3, 4, 5, 6});
index->copyin(vector<uint32_t>{0, 1, 1, 2});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -203,7 +203,7 @@ TEST(Gather, Cuda) {
auto index = gCpu->addTensor({1, 2}, DataType::UInt32);
gCpu->dataMalloc();
input->setData(IncrementalGenerator());
index->copyData(vector<uint32_t>{0, 2});
index->copyin(vector<uint32_t>{0, 2});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -224,7 +224,7 @@ TEST(Gather, Cuda) {
auto index = gCpu->addTensor({3, 1}, DataType::UInt32);
gCpu->dataMalloc();
input->setData(IncrementalGenerator());
index->copyData(vector<uint32_t>{0, 3, 1});
index->copyin(vector<uint32_t>{0, 3, 1});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);

View File

@ -18,7 +18,7 @@ void test_reducemean(const Shape &shape, const vector<float> &data,
// Build input data on CPU
Tensor icpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
icpu->dataMalloc();
icpu->copyData(data);
icpu->copyin(data);
// Build CUDA graph
Graph g = make_ref<GraphObj>(cudaRuntime);

View File

@ -13,8 +13,8 @@ TEST(Resize, Cuda_downsample_sizes_nearest) {
auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32);
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
sizes->copyData(vector<uint32_t>{1, 1, 1, 3});
input->copyin(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
sizes->copyin(vector<uint32_t>{1, 1, 1, 3});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -38,8 +38,8 @@ TEST(Resize, Cuda_upsample_sizes_nearest_notlarger) {
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
auto sizes = gCpu->addTensor({2}, DataType::UInt32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4});
sizes->copyData(vector<uint32_t>{7, 8});
input->copyin(vector<float>{1, 2, 3, 4});
sizes->copyin(vector<uint32_t>{7, 8});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -68,8 +68,8 @@ TEST(Resize, Cuda_upsample_sizes_nearest_notsmaller) {
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
auto sizes = gCpu->addTensor({2}, DataType::UInt32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4});
sizes->copyData(vector<uint32_t>{7, 8});
input->copyin(vector<float>{1, 2, 3, 4});
sizes->copyin(vector<uint32_t>{7, 8});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -98,9 +98,9 @@ TEST(Resize, Cuda_upsample_sizes_nearest_ceil_half_pixel) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
sizes->copyData(vector<uint32_t>{1, 1, 8, 8});
sizes->copyin(vector<uint32_t>{1, 1, 8, 8});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -131,9 +131,9 @@ TEST(Resize, Cuda_upsample_sizes_nearest_floor_align_corners) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto sizes = gCpu->addTensor({2}, DataType::UInt32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
sizes->copyData(vector<uint32_t>{8, 8});
sizes->copyin(vector<uint32_t>{8, 8});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -164,9 +164,9 @@ TEST(Resize, Cuda_upsample_sizes_nearest_round_prefer_ceil_asymmetri) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
sizes->copyData(vector<uint32_t>{1, 1, 8, 8});
sizes->copyin(vector<uint32_t>{1, 1, 8, 8});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -197,8 +197,8 @@ TEST(Resize, Cuda_downsample_scales_nearest) {
auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
scales->copyData(vector<float>{1, 1, 0.6, 0.6});
input->copyin(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
scales->copyin(vector<float>{1, 1, 0.6, 0.6});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -221,8 +221,8 @@ TEST(Resize, Cuda_upsample_scales_nearest) {
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4});
scales->copyData(vector<float>{1, 1, 2, 3});
input->copyin(vector<float>{1, 2, 3, 4});
scales->copyin(vector<float>{1, 1, 2, 3});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -247,8 +247,8 @@ TEST(Resize, Cuda_upsample_scales_nearest_axes_3_2) {
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
auto scales = gCpu->addTensor({2}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4});
scales->copyData(vector<float>{3, 2});
input->copyin(vector<float>{1, 2, 3, 4});
scales->copyin(vector<float>{3, 2});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -273,8 +273,8 @@ TEST(Resize, Cuda_downsample_scales_linear) {
auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
scales->copyData(vector<float>{1, 1, 0.6, 0.6});
input->copyin(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
scales->copyin(vector<float>{1, 1, 0.6, 0.6});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -297,8 +297,8 @@ TEST(Resize, Cuda_downsample_scales_linear_aligncorners) {
auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
scales->copyData(vector<float>{1, 1, 0.6, 0.6});
input->copyin(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
scales->copyin(vector<float>{1, 1, 0.6, 0.6});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -323,8 +323,8 @@ TEST(Resize, Cuda_upsample_scales_linear) {
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4});
scales->copyData(vector<float>{1, 1, 2, 2});
input->copyin(vector<float>{1, 2, 3, 4});
scales->copyin(vector<float>{1, 1, 2, 2});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -349,8 +349,8 @@ TEST(Resize, Cuda_upsample_scales_linear_align_corners) {
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(vector<float>{1, 2, 3, 4});
scales->copyData(vector<float>{1, 1, 2, 2});
input->copyin(vector<float>{1, 2, 3, 4});
scales->copyin(vector<float>{1, 1, 2, 2});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -377,9 +377,9 @@ TEST(Resize, Cuda_downsample_sizes_linear_pytorchhalfpixel) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
sizes->copyData(vector<uint32_t>{1, 1, 3, 1});
sizes->copyin(vector<uint32_t>{1, 1, 3, 1});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -406,10 +406,10 @@ TEST(Resize, Cuda_tf_crop_and_resize) {
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
auto roi = gCpu->addTensor({8}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
sizes->copyData(vector<uint32_t>{1, 1, 3, 3});
roi->copyData(vector<float>{0, 0, 0.4, 0.6, 1, 1, 0.6, 0.8});
sizes->copyin(vector<uint32_t>{1, 1, 3, 3});
roi->copyin(vector<float>{0, 0, 0.4, 0.6, 1, 1, 0.6, 0.8});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -437,10 +437,10 @@ TEST(Resize, Cuda_tf_crop_and_resize_axes_3_2) {
auto sizes = gCpu->addTensor({2}, DataType::UInt32);
auto roi = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
sizes->copyData(vector<uint32_t>{3, 3});
roi->copyData(vector<float>{0.6, 0.4, 0.8, 0.6});
sizes->copyin(vector<uint32_t>{3, 3});
roi->copyin(vector<float>{0.6, 0.4, 0.8, 0.6});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -467,9 +467,9 @@ TEST(Resize, Cuda_downsample_scales_cubic) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
scales->copyData(vector<float>{1.0, 1.0, 0.8, 0.8});
scales->copyin(vector<float>{1.0, 1.0, 0.8, 0.8});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -494,9 +494,9 @@ TEST(Resize, Cuda_downsample_scales_cubic_align_corners) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
scales->copyData(vector<float>{1.0, 1.0, 0.8, 0.8});
scales->copyin(vector<float>{1.0, 1.0, 0.8, 0.8});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -522,9 +522,9 @@ TEST(Resize, Cuda_upsample_scales_cubic) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
scales->copyData(vector<float>{1.0, 1.0, 2, 2});
scales->copyin(vector<float>{1.0, 1.0, 2, 2});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -559,9 +559,9 @@ TEST(Resize, Cuda_upsample_scales_cubic_align_corners) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
scales->copyData(vector<float>{1.0, 1.0, 2, 2});
scales->copyin(vector<float>{1.0, 1.0, 2, 2});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -598,9 +598,9 @@ TEST(Resize, Cuda_upsample_scales_cubic_asymmetric) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto scales = gCpu->addTensor({4}, DataType::Float32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
scales->copyData(vector<float>{1.0, 1.0, 2, 2});
scales->copyin(vector<float>{1.0, 1.0, 2, 2});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -633,9 +633,9 @@ TEST(Resize, Cuda_downsample_sizes_cubic) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
sizes->copyData(vector<uint32_t>{1, 1, 3, 3});
sizes->copyin(vector<uint32_t>{1, 1, 3, 3});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
@ -667,9 +667,9 @@ TEST(Resize, Cuda_upsample_sizes_cubic) {
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
gCpu->dataMalloc();
input->copyData(
input->copyin(
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
sizes->copyData(vector<uint32_t>{1, 1, 9, 10});
sizes->copyin(vector<uint32_t>{1, 1, 9, 10});
auto cudaRuntime = make_ref<CudaRuntimeObj>();
Graph gCuda = make_ref<GraphObj>(cudaRuntime);

View File

@ -61,7 +61,7 @@ TEST(Conv, NaiveCPU) {
auto ans =
make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::UInt32, runtime);
ans->dataMalloc();
ans->copyData(
ans->copyin(
vector<uint32_t>{4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656});
EXPECT_TRUE(conv->getOutput()->equalData(ans));
}

View File

@ -12,7 +12,7 @@ TEST(Resize, ShapeInference) {
Tensor i = g->addTensor({1, 1, 2, 4}, DataType::UInt32);
Tensor sizes = g->addTensor({4}, DataType::UInt32);
sizes->dataMalloc();
sizes->copyData(vector<uint32_t>{1, 1, 1, 3});
sizes->copyin(vector<uint32_t>{1, 1, 1, 3});
auto op = g->addOp<ResizeObj>(
i, nullptr, std::nullopt, sizes, nullptr, nullptr,
ResizeObj::EKeepAspectRatioPolicy::stretch);
@ -24,7 +24,7 @@ TEST(Resize, ShapeInference) {
Tensor i = g->addTensor({1, 1, 2, 4}, DataType::UInt32);
Tensor sizes = g->addTensor({2}, DataType::UInt32);
sizes->dataMalloc();
sizes->copyData(vector<uint32_t>{1, 3});
sizes->copyin(vector<uint32_t>{1, 3});
auto op = g->addOp<ResizeObj>(
i, nullptr, vector<int>{2, 3}, sizes, nullptr, nullptr,
ResizeObj::EKeepAspectRatioPolicy::stretch);
@ -36,7 +36,7 @@ TEST(Resize, ShapeInference) {
Tensor i = g->addTensor({1, 3, 2, 4}, DataType::UInt32);
Tensor sizes = g->addTensor({2}, DataType::UInt32);
sizes->dataMalloc();
sizes->copyData(vector<uint32_t>{7, 8});
sizes->copyin(vector<uint32_t>{7, 8});
auto op = g->addOp<ResizeObj>(
i, nullptr, vector<int>{2, 3}, sizes, nullptr, nullptr,
ResizeObj::EKeepAspectRatioPolicy::notLarger);
@ -48,7 +48,7 @@ TEST(Resize, ShapeInference) {
Tensor i = g->addTensor({1, 3, 2, 4}, DataType::UInt32);
Tensor sizes = g->addTensor({3}, DataType::UInt32);
sizes->dataMalloc();
sizes->copyData(vector<uint32_t>{2, 6, 8});
sizes->copyin(vector<uint32_t>{2, 6, 8});
auto op = g->addOp<ResizeObj>(
i, nullptr, vector<int>{1, 2, 3}, sizes, nullptr, nullptr,
ResizeObj::EKeepAspectRatioPolicy::notSmaller);
@ -60,7 +60,7 @@ TEST(Resize, ShapeInference) {
Tensor i = g->addTensor({1, 1, 4, 4}, DataType::UInt32);
Tensor scales = g->addTensor({3}, DataType::Float32);
scales->dataMalloc();
scales->copyData(vector<float>{1, 0.8, 0.8});
scales->copyin(vector<float>{1, 0.8, 0.8});
auto op = g->addOp<ResizeObj>(i, nullptr, vector<int>{1, 2, 3}, nullptr,
scales, nullptr);
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 1, 3, 3}));
@ -71,7 +71,7 @@ TEST(Resize, ShapeInference) {
Tensor i = g->addTensor({1, 1, 2, 2}, DataType::UInt32);
Tensor scales = g->addTensor({4}, DataType::Float32);
scales->dataMalloc();
scales->copyData(vector<float>{1, 1, 2, 2});
scales->copyin(vector<float>{1, 1, 2, 2});
auto op = g->addOp<ResizeObj>(i, nullptr, std::nullopt, nullptr, scales,
nullptr);
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 1, 4, 4}));