forked from jiuyuan/InfiniTensor
refactor: 整合操作张量数据的方法
Signed-off-by: YdrMaster <ydrml@hotmail.com>
This commit is contained in:
parent
e1c976568d
commit
9db97eb212
|
@ -90,15 +90,15 @@ class GraphHandlerObj {
|
|||
inline void data_malloc() { g->dataMalloc(); }
|
||||
|
||||
inline void copy_int32(Tensor tensor, std::vector<int32_t> list) {
|
||||
tensor->copyData(list);
|
||||
tensor->copyin(list);
|
||||
}
|
||||
|
||||
inline void copy_int64(Tensor tensor, std::vector<int64_t> list) {
|
||||
tensor->copyData(list);
|
||||
tensor->copyin(list);
|
||||
}
|
||||
|
||||
inline void copy_float(Tensor tensor, std::vector<float> list) {
|
||||
tensor->copyData(list);
|
||||
tensor->copyin(list);
|
||||
}
|
||||
|
||||
inline void run() { g->getRuntime()->run(g); }
|
||||
|
|
|
@ -15,39 +15,61 @@ using Shape = vector<ShapeElem>;
|
|||
class TensorObj : public TensorBaseObj {
|
||||
private:
|
||||
Shape shape;
|
||||
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
|
||||
// scratch have a new id.
|
||||
size_t _size; // Cache of Π(shape).
|
||||
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
|
||||
// scratch have a new id.
|
||||
|
||||
inline void copyin(const void *ptr, size_t size) {
|
||||
runtime->copyBlobFromCPU(getRawDataPtr<void *>(), ptr, size);
|
||||
}
|
||||
inline void copyout(void *ptr, size_t size) const {
|
||||
runtime->copyBlobToCPU(ptr, getRawDataPtr<void *>(), size);
|
||||
}
|
||||
|
||||
public:
|
||||
TensorObj(const Shape &shape, DataType dtype, Runtime runtime);
|
||||
TensorObj(Shape shape, DataType dtype, Runtime runtime);
|
||||
virtual ~TensorObj() {}
|
||||
string toString() const override;
|
||||
|
||||
size_t size() const;
|
||||
size_t getBytes() const;
|
||||
inline size_t size() const { return _size; }
|
||||
inline size_t getBytes() const { return _size * dtype.getSize(); }
|
||||
|
||||
Shape getDims() const { return shape; }
|
||||
vector<size_t> getStride() const;
|
||||
size_t getOffset(const Shape &ds) const;
|
||||
using TensorBaseObj::getData;
|
||||
VType getData(const Shape &pos) const;
|
||||
size_t getOffset(const vector<int> &ds) const;
|
||||
void dataMalloc();
|
||||
inline UidBaseType getFuid() const { return fuid; }
|
||||
|
||||
void load(std::string file_path);
|
||||
void save(std::string file_path);
|
||||
|
||||
template <typename T> void copyData(const T *dptr) {
|
||||
// Copy elements from `data`.
|
||||
template <typename T> inline void copyin(const vector<T> &data) {
|
||||
IT_ASSERT(DataType::get<T>() == dtype);
|
||||
IT_ASSERT(data != nullptr);
|
||||
runtime->copyBlobFromCPU(getRawDataPtr<void *>(), dptr, getBytes());
|
||||
IT_ASSERT(data.size() >= _size);
|
||||
copyin(data.data(), getBytes());
|
||||
}
|
||||
// Copy all the elements to a vector.
|
||||
template <typename T> inline auto copyout() const {
|
||||
IT_ASSERT(DataType::get<T>() == dtype);
|
||||
std::vector<T> ans(_size);
|
||||
copyout(ans.data(), getBytes());
|
||||
return ans;
|
||||
}
|
||||
// Copy the element at `pos`.
|
||||
template <typename T> inline auto copyout(const vector<int> &pos) const {
|
||||
IT_ASSERT(DataType::get<T>() == dtype);
|
||||
auto offset = getOffset(pos);
|
||||
auto bytes = dtype.getSize();
|
||||
T ans;
|
||||
runtime->copyBlobToCPU(&ans, getRawDataPtr<void *>() + offset * bytes,
|
||||
bytes);
|
||||
return ans;
|
||||
}
|
||||
|
||||
template <typename T> void copyData(vector<T> dataVector) {
|
||||
IT_ASSERT(DataType::get<T>() == dtype);
|
||||
IT_ASSERT(dataVector.size() >= size());
|
||||
copyData(dataVector.data());
|
||||
}
|
||||
inline auto copyoutFloat() const { return copyout<float>(); }
|
||||
inline auto copyoutInt32() const { return copyout<int32_t>(); }
|
||||
inline auto copyoutInt64() const { return copyout<int64_t>(); }
|
||||
|
||||
void copyData(const TensorObj *src);
|
||||
void copyData(const Tensor &src) { copyData(src.get()); }
|
||||
|
@ -72,24 +94,6 @@ class TensorObj : public TensorBaseObj {
|
|||
}
|
||||
return obj;
|
||||
}
|
||||
inline std::vector<float> cloneFloats() const {
|
||||
IT_ASSERT(data != nullptr);
|
||||
IT_ASSERT(getDType() == DataType::Float32);
|
||||
std::vector<float> ans(size());
|
||||
auto src = getRawDataPtr<void *>();
|
||||
auto dst = ans.data();
|
||||
auto bytes = getBytes();
|
||||
if (runtime->isCpu()) {
|
||||
memcpy(dst, src, bytes);
|
||||
} else {
|
||||
#if USE_CUDA
|
||||
cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
|
||||
#else
|
||||
IT_TODO_HALT();
|
||||
#endif
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
void printData() const;
|
||||
bool equalData(const Tensor &rhs) const;
|
||||
|
|
|
@ -41,7 +41,6 @@ class TensorBaseObj : public Object {
|
|||
IT_ASSERT(data != nullptr);
|
||||
return data->getPtr<T>();
|
||||
}
|
||||
VType getData(size_t offset) const;
|
||||
|
||||
DataType getDType() const { return dtype; }
|
||||
Runtime getRuntime() const { return runtime; }
|
||||
|
|
|
@ -3,15 +3,16 @@
|
|||
#include "core/operator.h"
|
||||
#include "core/runtime.h"
|
||||
#include "utils/dataloader.h"
|
||||
#include <numeric>
|
||||
|
||||
namespace infini {
|
||||
|
||||
TensorObj::TensorObj(const Shape &shape, DataType dtype, Runtime runtime)
|
||||
: TensorBaseObj(shape.size(), dtype, runtime), shape(shape) {}
|
||||
|
||||
VType TensorObj::getData(const Shape &pos) const {
|
||||
return getData(getOffset(pos));
|
||||
}
|
||||
TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime)
|
||||
: TensorBaseObj(shape.size(), dtype, runtime), shape(std::move(shape_)),
|
||||
_size(shape.empty()
|
||||
? 0
|
||||
: std::accumulate(shape.begin(), shape.end(), 1,
|
||||
[](auto acc, auto x) { return acc * x; })) {}
|
||||
|
||||
string TensorObj::toString() const {
|
||||
string ret = "Tensor " + std::to_string(guid) + ", Fuid " +
|
||||
|
@ -28,7 +29,7 @@ string TensorObj::toString() const {
|
|||
return ret;
|
||||
}
|
||||
|
||||
size_t TensorObj::getOffset(const Shape &pos) const {
|
||||
size_t TensorObj::getOffset(const vector<int> &pos) const {
|
||||
auto nDim = pos.size();
|
||||
IT_ASSERT(shape.size() == nDim);
|
||||
if (pos.empty())
|
||||
|
@ -53,15 +54,6 @@ vector<size_t> TensorObj::getStride() const {
|
|||
return ret;
|
||||
}
|
||||
|
||||
size_t TensorObj::size() const {
|
||||
size_t ret = 1;
|
||||
for (const auto &d : shape)
|
||||
ret *= d;
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t TensorObj::getBytes() const { return size() * dtype.getSize(); }
|
||||
|
||||
void TensorObj::printData() const {
|
||||
IT_ASSERT(data != nullptr);
|
||||
if (!runtime->isCpu())
|
||||
|
|
|
@ -6,9 +6,4 @@ namespace infini {
|
|||
TensorBaseObj::TensorBaseObj(int dim, DataType dtype, Runtime runtime)
|
||||
: dim(dim), dtype(dtype), runtime(runtime) {}
|
||||
|
||||
VType TensorBaseObj::getData(size_t offset) const {
|
||||
// TODO: check cuda array
|
||||
return (data->getPtr<VType *>())[offset];
|
||||
}
|
||||
|
||||
}; // namespace infini
|
||||
}; // namespace infini
|
||||
|
|
|
@ -177,7 +177,7 @@ void init_graph_builder(py::module &m) {
|
|||
py::class_<TensorObj, std::shared_ptr<TensorObj>>(m, "Tensor")
|
||||
.def("fuid", &TensorObj::getFuid, policy::automatic)
|
||||
.def("shape", &TensorObj::getDims, policy::move)
|
||||
.def("cloneFloats", &TensorObj::cloneFloats, policy::move)
|
||||
.def("copyoutFloat", &TensorObj::copyoutFloat, policy::move)
|
||||
.def("has_target", &TensorObj::hasTarget, policy::automatic)
|
||||
.def("src", &TensorObj::getOutputOf, policy::move);
|
||||
py::class_<OperatorObj, std::shared_ptr<OperatorObj>>(m, "Operator")
|
||||
|
|
|
@ -64,7 +64,7 @@ class MemboundInterpreter : public Kernel {
|
|||
vector<uint32_t> valsUint(vals.size());
|
||||
for (size_t i = 0; i < vals.size(); ++i)
|
||||
valsUint[i] = (uint32_t)vals[i];
|
||||
output->copyData(valsUint);
|
||||
output->copyin(valsUint);
|
||||
}
|
||||
|
||||
void compute(const Operator &op, const RuntimeObj *context) const override {
|
||||
|
@ -81,4 +81,4 @@ class MemboundInterpreter : public Kernel {
|
|||
REGISTER_KERNEL(Device::CPU, OpType::MemBound, DataType::UInt32,
|
||||
MemboundInterpreter, "MemboundInterpreter_CPU");
|
||||
|
||||
} // namespace infini
|
||||
} // namespace infini
|
||||
|
|
|
@ -59,13 +59,13 @@ void loadTensorData(TensorObj *tensor, std::string file_path) {
|
|||
for (int i = 0; i < temp.data_float_size(); ++i) {
|
||||
data_temp.push_back(temp.data_float(i));
|
||||
}
|
||||
tensor->copyData(data_temp);
|
||||
tensor->copyin(data_temp);
|
||||
} else if (tensor->getDType() == DataType::UInt32) {
|
||||
std::vector<uint32_t> data_temp;
|
||||
for (int i = 0; i < temp.data_uint32_size(); ++i) {
|
||||
data_temp.push_back(temp.data_uint32(i));
|
||||
}
|
||||
tensor->copyData(data_temp);
|
||||
tensor->copyin(data_temp);
|
||||
} else {
|
||||
IT_TODO_HALT();
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ TEST(Graph, build_and_run) {
|
|||
Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
|
||||
Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32);
|
||||
g->dataMalloc();
|
||||
i0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
w0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
i0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
w0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
auto matmul = g->addOpWithOutputs<MatmulObj>(i0, w0, o0);
|
||||
g->print();
|
||||
// check inputOf and outputsOf for tensor
|
||||
|
@ -33,7 +33,7 @@ TEST(Graph, build_and_run) {
|
|||
// check execution results
|
||||
auto ans = make_ref<TensorObj>(Shape{1, 2, 4}, DataType::UInt32, runtime);
|
||||
ans->dataMalloc();
|
||||
ans->copyData(vector<uint32_t>{38, 44, 50, 56, 83, 98, 113, 128});
|
||||
ans->copyin(vector<uint32_t>{38, 44, 50, 56, 83, 98, 113, 128});
|
||||
EXPECT_TRUE(o0->equalData(ans));
|
||||
}
|
||||
|
||||
|
@ -84,8 +84,8 @@ TEST(Graph, perf_engine) {
|
|||
auto matmul = g->addOp<MatmulObj>(i0, w0, nullptr);
|
||||
|
||||
g->dataMalloc();
|
||||
i0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
w0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
i0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
w0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
runtime->run(g, true, true);
|
||||
double perfTime = runtime->getPerfTime(g);
|
||||
// The example matmul takes 0.0036ms with one core
|
||||
|
@ -94,7 +94,7 @@ TEST(Graph, perf_engine) {
|
|||
// check answer
|
||||
auto ans = make_ref<TensorObj>(Shape{1, 2, 4}, DataType::UInt32, runtime);
|
||||
ans->dataMalloc();
|
||||
ans->copyData(vector<uint32_t>{38, 44, 50, 56, 83, 98, 113, 128});
|
||||
ans->copyin(vector<uint32_t>{38, 44, 50, 56, 83, 98, 113, 128});
|
||||
EXPECT_TRUE(matmul->getOutput()->equalData(ans));
|
||||
}
|
||||
|
||||
|
@ -105,8 +105,8 @@ TEST(Graph, test_tensor_id) {
|
|||
Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
|
||||
Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32);
|
||||
g->dataMalloc();
|
||||
i0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
w0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
i0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
w0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
auto i1 = g->addTensor(i0->clone());
|
||||
auto matmul = g->addOpWithOutputs<MatmulObj>(i0, w0, o0);
|
||||
g->print();
|
||||
|
@ -123,8 +123,8 @@ TEST(Graph, test_OpVec_ctor) {
|
|||
Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
|
||||
Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32);
|
||||
g->dataMalloc();
|
||||
i0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
w0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
i0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
w0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
auto o1 = g->addTensor(o0->clone());
|
||||
auto matmul = g->addOpWithOutputs<MatmulObj>(i0, w0, o0);
|
||||
g->addOp<ReluObj>(o1, nullptr);
|
||||
|
|
|
@ -19,8 +19,8 @@ namespace infini {
|
|||
// Tensor w0 = g->addTensor({1, 3, 4}, DataType::UInt32);
|
||||
// Tensor o0 = g->addTensor({1, 2, 4}, DataType::UInt32);
|
||||
// g->dataMalloc();
|
||||
// i0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
// w0->copyData(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
// i0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
// w0->copyin(vector<uint32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
// auto matmul = g->addOpWithOutputs<MatmulObj>(i0, w0, o0);
|
||||
// g->print();
|
||||
// // check inputOf and outputsOf for tensor
|
||||
|
|
|
@ -14,10 +14,10 @@ TEST(Prtotbuf, save_and_load) {
|
|||
Tensor u0 = g->addTensor({1, 3, 4}, DataType::UInt32);
|
||||
Tensor u1 = g->addTensor({1, 3, 4}, DataType::UInt32);
|
||||
g->dataMalloc();
|
||||
i0->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
w0->copyData(vector<float>{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
|
||||
u0->copyData(vector<uint32_t>{1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 0, 0});
|
||||
u1->copyData(vector<uint32_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0});
|
||||
i0->copyin(vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
|
||||
w0->copyin(vector<float>{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
|
||||
u0->copyin(vector<uint32_t>{1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 0, 0});
|
||||
u1->copyin(vector<uint32_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0});
|
||||
i0->save("i0.pb");
|
||||
w0->printData();
|
||||
w0->load("i0.pb");
|
||||
|
|
|
@ -22,8 +22,8 @@ TEST(CUDA_BatchNorm, run) {
|
|||
// Build input data on CPU
|
||||
gCpu->dataMalloc();
|
||||
iCpu->setData(IncrementalGenerator());
|
||||
meanCpu->copyData(vector<float>{1, 6, 9});
|
||||
varCpu->copyData(vector<float>{4, 1, 9});
|
||||
meanCpu->copyin(vector<float>{1, 6, 9});
|
||||
varCpu->copyin(vector<float>{4, 1, 9});
|
||||
scaleCpu->setData(OneGenerator());
|
||||
biasCpu->setData(ZeroGenerator());
|
||||
|
||||
|
|
|
@ -181,8 +181,8 @@ TEST(Gather, Cuda) {
|
|||
auto input = gCpu->addTensor({3, 2}, DataType::Float32);
|
||||
auto index = gCpu->addTensor({2, 2}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4, 5, 6});
|
||||
index->copyData(vector<uint32_t>{0, 1, 1, 2});
|
||||
input->copyin(vector<float>{1, 2, 3, 4, 5, 6});
|
||||
index->copyin(vector<uint32_t>{0, 1, 1, 2});
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
||||
|
@ -203,7 +203,7 @@ TEST(Gather, Cuda) {
|
|||
auto index = gCpu->addTensor({1, 2}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->setData(IncrementalGenerator());
|
||||
index->copyData(vector<uint32_t>{0, 2});
|
||||
index->copyin(vector<uint32_t>{0, 2});
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
||||
|
@ -224,7 +224,7 @@ TEST(Gather, Cuda) {
|
|||
auto index = gCpu->addTensor({3, 1}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->setData(IncrementalGenerator());
|
||||
index->copyData(vector<uint32_t>{0, 3, 1});
|
||||
index->copyin(vector<uint32_t>{0, 3, 1});
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ void test_reducemean(const Shape &shape, const vector<float> &data,
|
|||
// Build input data on CPU
|
||||
Tensor icpu = make_ref<TensorObj>(shape, DataType::Float32, cpuRuntime);
|
||||
icpu->dataMalloc();
|
||||
icpu->copyData(data);
|
||||
icpu->copyin(data);
|
||||
|
||||
// Build CUDA graph
|
||||
Graph g = make_ref<GraphObj>(cudaRuntime);
|
||||
|
|
|
@ -13,8 +13,8 @@ TEST(Resize, Cuda_downsample_sizes_nearest) {
|
|||
auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32);
|
||||
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
|
||||
sizes->copyData(vector<uint32_t>{1, 1, 1, 3});
|
||||
input->copyin(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
|
||||
sizes->copyin(vector<uint32_t>{1, 1, 1, 3});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -38,8 +38,8 @@ TEST(Resize, Cuda_upsample_sizes_nearest_notlarger) {
|
|||
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
|
||||
auto sizes = gCpu->addTensor({2}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4});
|
||||
sizes->copyData(vector<uint32_t>{7, 8});
|
||||
input->copyin(vector<float>{1, 2, 3, 4});
|
||||
sizes->copyin(vector<uint32_t>{7, 8});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -68,8 +68,8 @@ TEST(Resize, Cuda_upsample_sizes_nearest_notsmaller) {
|
|||
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
|
||||
auto sizes = gCpu->addTensor({2}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4});
|
||||
sizes->copyData(vector<uint32_t>{7, 8});
|
||||
input->copyin(vector<float>{1, 2, 3, 4});
|
||||
sizes->copyin(vector<uint32_t>{7, 8});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -98,9 +98,9 @@ TEST(Resize, Cuda_upsample_sizes_nearest_ceil_half_pixel) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
sizes->copyData(vector<uint32_t>{1, 1, 8, 8});
|
||||
sizes->copyin(vector<uint32_t>{1, 1, 8, 8});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -131,9 +131,9 @@ TEST(Resize, Cuda_upsample_sizes_nearest_floor_align_corners) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto sizes = gCpu->addTensor({2}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
sizes->copyData(vector<uint32_t>{8, 8});
|
||||
sizes->copyin(vector<uint32_t>{8, 8});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -164,9 +164,9 @@ TEST(Resize, Cuda_upsample_sizes_nearest_round_prefer_ceil_asymmetri) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
sizes->copyData(vector<uint32_t>{1, 1, 8, 8});
|
||||
sizes->copyin(vector<uint32_t>{1, 1, 8, 8});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -197,8 +197,8 @@ TEST(Resize, Cuda_downsample_scales_nearest) {
|
|||
auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
|
||||
scales->copyData(vector<float>{1, 1, 0.6, 0.6});
|
||||
input->copyin(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
|
||||
scales->copyin(vector<float>{1, 1, 0.6, 0.6});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -221,8 +221,8 @@ TEST(Resize, Cuda_upsample_scales_nearest) {
|
|||
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4});
|
||||
scales->copyData(vector<float>{1, 1, 2, 3});
|
||||
input->copyin(vector<float>{1, 2, 3, 4});
|
||||
scales->copyin(vector<float>{1, 1, 2, 3});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -247,8 +247,8 @@ TEST(Resize, Cuda_upsample_scales_nearest_axes_3_2) {
|
|||
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({2}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4});
|
||||
scales->copyData(vector<float>{3, 2});
|
||||
input->copyin(vector<float>{1, 2, 3, 4});
|
||||
scales->copyin(vector<float>{3, 2});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -273,8 +273,8 @@ TEST(Resize, Cuda_downsample_scales_linear) {
|
|||
auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
|
||||
scales->copyData(vector<float>{1, 1, 0.6, 0.6});
|
||||
input->copyin(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
|
||||
scales->copyin(vector<float>{1, 1, 0.6, 0.6});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -297,8 +297,8 @@ TEST(Resize, Cuda_downsample_scales_linear_aligncorners) {
|
|||
auto input = gCpu->addTensor({1, 1, 2, 4}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
|
||||
scales->copyData(vector<float>{1, 1, 0.6, 0.6});
|
||||
input->copyin(vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
|
||||
scales->copyin(vector<float>{1, 1, 0.6, 0.6});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -323,8 +323,8 @@ TEST(Resize, Cuda_upsample_scales_linear) {
|
|||
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4});
|
||||
scales->copyData(vector<float>{1, 1, 2, 2});
|
||||
input->copyin(vector<float>{1, 2, 3, 4});
|
||||
scales->copyin(vector<float>{1, 1, 2, 2});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -349,8 +349,8 @@ TEST(Resize, Cuda_upsample_scales_linear_align_corners) {
|
|||
auto input = gCpu->addTensor({1, 1, 2, 2}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(vector<float>{1, 2, 3, 4});
|
||||
scales->copyData(vector<float>{1, 1, 2, 2});
|
||||
input->copyin(vector<float>{1, 2, 3, 4});
|
||||
scales->copyin(vector<float>{1, 1, 2, 2});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -377,9 +377,9 @@ TEST(Resize, Cuda_downsample_sizes_linear_pytorchhalfpixel) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
sizes->copyData(vector<uint32_t>{1, 1, 3, 1});
|
||||
sizes->copyin(vector<uint32_t>{1, 1, 3, 1});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -406,10 +406,10 @@ TEST(Resize, Cuda_tf_crop_and_resize) {
|
|||
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
|
||||
auto roi = gCpu->addTensor({8}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
sizes->copyData(vector<uint32_t>{1, 1, 3, 3});
|
||||
roi->copyData(vector<float>{0, 0, 0.4, 0.6, 1, 1, 0.6, 0.8});
|
||||
sizes->copyin(vector<uint32_t>{1, 1, 3, 3});
|
||||
roi->copyin(vector<float>{0, 0, 0.4, 0.6, 1, 1, 0.6, 0.8});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -437,10 +437,10 @@ TEST(Resize, Cuda_tf_crop_and_resize_axes_3_2) {
|
|||
auto sizes = gCpu->addTensor({2}, DataType::UInt32);
|
||||
auto roi = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
sizes->copyData(vector<uint32_t>{3, 3});
|
||||
roi->copyData(vector<float>{0.6, 0.4, 0.8, 0.6});
|
||||
sizes->copyin(vector<uint32_t>{3, 3});
|
||||
roi->copyin(vector<float>{0.6, 0.4, 0.8, 0.6});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -467,9 +467,9 @@ TEST(Resize, Cuda_downsample_scales_cubic) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
scales->copyData(vector<float>{1.0, 1.0, 0.8, 0.8});
|
||||
scales->copyin(vector<float>{1.0, 1.0, 0.8, 0.8});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -494,9 +494,9 @@ TEST(Resize, Cuda_downsample_scales_cubic_align_corners) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
scales->copyData(vector<float>{1.0, 1.0, 0.8, 0.8});
|
||||
scales->copyin(vector<float>{1.0, 1.0, 0.8, 0.8});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -522,9 +522,9 @@ TEST(Resize, Cuda_upsample_scales_cubic) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
scales->copyData(vector<float>{1.0, 1.0, 2, 2});
|
||||
scales->copyin(vector<float>{1.0, 1.0, 2, 2});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -559,9 +559,9 @@ TEST(Resize, Cuda_upsample_scales_cubic_align_corners) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
scales->copyData(vector<float>{1.0, 1.0, 2, 2});
|
||||
scales->copyin(vector<float>{1.0, 1.0, 2, 2});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -598,9 +598,9 @@ TEST(Resize, Cuda_upsample_scales_cubic_asymmetric) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto scales = gCpu->addTensor({4}, DataType::Float32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
scales->copyData(vector<float>{1.0, 1.0, 2, 2});
|
||||
scales->copyin(vector<float>{1.0, 1.0, 2, 2});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -633,9 +633,9 @@ TEST(Resize, Cuda_downsample_sizes_cubic) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
sizes->copyData(vector<uint32_t>{1, 1, 3, 3});
|
||||
sizes->copyin(vector<uint32_t>{1, 1, 3, 3});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
@ -667,9 +667,9 @@ TEST(Resize, Cuda_upsample_sizes_cubic) {
|
|||
auto input = gCpu->addTensor({1, 1, 4, 4}, DataType::Float32);
|
||||
auto sizes = gCpu->addTensor({4}, DataType::UInt32);
|
||||
gCpu->dataMalloc();
|
||||
input->copyData(
|
||||
input->copyin(
|
||||
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
|
||||
sizes->copyData(vector<uint32_t>{1, 1, 9, 10});
|
||||
sizes->copyin(vector<uint32_t>{1, 1, 9, 10});
|
||||
|
||||
auto cudaRuntime = make_ref<CudaRuntimeObj>();
|
||||
Graph gCuda = make_ref<GraphObj>(cudaRuntime);
|
||||
|
|
|
@ -61,7 +61,7 @@ TEST(Conv, NaiveCPU) {
|
|||
auto ans =
|
||||
make_ref<TensorObj>(Shape{1, 2, 2, 2}, DataType::UInt32, runtime);
|
||||
ans->dataMalloc();
|
||||
ans->copyData(
|
||||
ans->copyin(
|
||||
vector<uint32_t>{4794, 4386, 8199, 7506, 11274, 10542, 20835, 19656});
|
||||
EXPECT_TRUE(conv->getOutput()->equalData(ans));
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@ TEST(Resize, ShapeInference) {
|
|||
Tensor i = g->addTensor({1, 1, 2, 4}, DataType::UInt32);
|
||||
Tensor sizes = g->addTensor({4}, DataType::UInt32);
|
||||
sizes->dataMalloc();
|
||||
sizes->copyData(vector<uint32_t>{1, 1, 1, 3});
|
||||
sizes->copyin(vector<uint32_t>{1, 1, 1, 3});
|
||||
auto op = g->addOp<ResizeObj>(
|
||||
i, nullptr, std::nullopt, sizes, nullptr, nullptr,
|
||||
ResizeObj::EKeepAspectRatioPolicy::stretch);
|
||||
|
@ -24,7 +24,7 @@ TEST(Resize, ShapeInference) {
|
|||
Tensor i = g->addTensor({1, 1, 2, 4}, DataType::UInt32);
|
||||
Tensor sizes = g->addTensor({2}, DataType::UInt32);
|
||||
sizes->dataMalloc();
|
||||
sizes->copyData(vector<uint32_t>{1, 3});
|
||||
sizes->copyin(vector<uint32_t>{1, 3});
|
||||
auto op = g->addOp<ResizeObj>(
|
||||
i, nullptr, vector<int>{2, 3}, sizes, nullptr, nullptr,
|
||||
ResizeObj::EKeepAspectRatioPolicy::stretch);
|
||||
|
@ -36,7 +36,7 @@ TEST(Resize, ShapeInference) {
|
|||
Tensor i = g->addTensor({1, 3, 2, 4}, DataType::UInt32);
|
||||
Tensor sizes = g->addTensor({2}, DataType::UInt32);
|
||||
sizes->dataMalloc();
|
||||
sizes->copyData(vector<uint32_t>{7, 8});
|
||||
sizes->copyin(vector<uint32_t>{7, 8});
|
||||
auto op = g->addOp<ResizeObj>(
|
||||
i, nullptr, vector<int>{2, 3}, sizes, nullptr, nullptr,
|
||||
ResizeObj::EKeepAspectRatioPolicy::notLarger);
|
||||
|
@ -48,7 +48,7 @@ TEST(Resize, ShapeInference) {
|
|||
Tensor i = g->addTensor({1, 3, 2, 4}, DataType::UInt32);
|
||||
Tensor sizes = g->addTensor({3}, DataType::UInt32);
|
||||
sizes->dataMalloc();
|
||||
sizes->copyData(vector<uint32_t>{2, 6, 8});
|
||||
sizes->copyin(vector<uint32_t>{2, 6, 8});
|
||||
auto op = g->addOp<ResizeObj>(
|
||||
i, nullptr, vector<int>{1, 2, 3}, sizes, nullptr, nullptr,
|
||||
ResizeObj::EKeepAspectRatioPolicy::notSmaller);
|
||||
|
@ -60,7 +60,7 @@ TEST(Resize, ShapeInference) {
|
|||
Tensor i = g->addTensor({1, 1, 4, 4}, DataType::UInt32);
|
||||
Tensor scales = g->addTensor({3}, DataType::Float32);
|
||||
scales->dataMalloc();
|
||||
scales->copyData(vector<float>{1, 0.8, 0.8});
|
||||
scales->copyin(vector<float>{1, 0.8, 0.8});
|
||||
auto op = g->addOp<ResizeObj>(i, nullptr, vector<int>{1, 2, 3}, nullptr,
|
||||
scales, nullptr);
|
||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 1, 3, 3}));
|
||||
|
@ -71,7 +71,7 @@ TEST(Resize, ShapeInference) {
|
|||
Tensor i = g->addTensor({1, 1, 2, 2}, DataType::UInt32);
|
||||
Tensor scales = g->addTensor({4}, DataType::Float32);
|
||||
scales->dataMalloc();
|
||||
scales->copyData(vector<float>{1, 1, 2, 2});
|
||||
scales->copyin(vector<float>{1, 1, 2, 2});
|
||||
auto op = g->addOp<ResizeObj>(i, nullptr, std::nullopt, nullptr, scales,
|
||||
nullptr);
|
||||
EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 1, 4, 4}));
|
||||
|
|
Loading…
Reference in New Issue