From 03de74f4bc822c5d8e3d1748a97cbc42bb1c2ba1 Mon Sep 17 00:00:00 2001 From: Hardy <100662313+wanghailu0717@users.noreply.github.com> Date: Tue, 13 Sep 2022 11:27:41 +0800 Subject: [PATCH] Tensor serialization (#25) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * use protobuf for tensor data save,write,read, in chinese 序列化和反序列化 * add protobuf * add code for tensor load & save from/to file * add code for tensor laod & save * add code for tensor load & save * add code for tensor save & load * add code for tensor save & load * add code for save & load * add code for load & save * add code for tensor load & save * add code for tensor save & load Co-authored-by: wanghailu --- CMakeLists.txt | 22 ++++++++++ include/core/tensor.h | 5 ++- include/utils/dataloader.h | 10 +++++ proto/data.proto | 45 +++++++++++++++++++ src/core/tensor.cc | 7 ++- src/utils/dataloader.cc | 81 +++++++++++++++++++++++++++++++++++ test/core/test_tensor_save.cc | 33 ++++++++++++++ 7 files changed, 201 insertions(+), 2 deletions(-) create mode 100644 include/utils/dataloader.h create mode 100644 proto/data.proto create mode 100644 src/utils/dataloader.cc create mode 100644 test/core/test_tensor_save.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 8fa64994..dcc43cf2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,7 @@ project(InfiniTensor C CXX) # Do not change these options in this file. Use cmake.config, cmake -DOPTION=VALUE, or ccmake to specify them. option(USE_CUDA "Support CUDA GPU" ON) option(USE_BACKTRACE "Print backtrace on exception and segmentation fault" ON) +option(USE_PROTOBUF "Serialize and deserialize tensors" ON) option(BUILD_TEST "Build tests" ON) cmake_dependent_option(BUILD_TEST_CORE "Build tests for core components" ON BUILD_TEST OFF) @@ -32,6 +33,24 @@ endif() if(OpenMP_CXX_FOUND) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") endif() +#Protobuf +if(USE_PROTOBUF) + add_definitions(-D TENSOR_PROTOBUF) + find_package(Protobuf REQUIRED) + message(STATUS "protobuf include: " ${PROTOBUF_INCLUDE_DIRS}) + message(STATUS "protobuf libraries: " ${PROTOBUF_LIBRARIES}) + message(STATUS "protoc executable: " ${PROTOBUF_PROTOC_EXECUTABLE}) + include_directories(${PROTOBUF_INCLUDE_DIR}) + include_directories(${CMAKE_CURRENT_BINARY_DIR}) + set(PROTO_PATH "${CMAKE_CURRENT_SOURCE_DIR}/proto") + file(GLOB PROTO_FILES "${PROTO_PATH}/data.proto") + protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS ${PROTO_FILES}) + message(${PROTO_SRCS} "-----------" ${PROTO_FILES}) + message(${PROTO_HDRS} "-----------" ${PROTO_FILES}) + add_library(tensor_proto SHARED ${PROTO_SRCS} ${PROTO_HDRS}) + target_link_libraries(tensor_proto PUBLIC ${PROTOBUF_LIBRARIES}) +endif() + include_directories(include) @@ -57,6 +76,9 @@ endif() file(GLOB_RECURSE SRC src/*.cc src/*.cu) add_library(InfiniTensor SHARED ${SRC}) +if(USE_PROTOBUF) + target_link_libraries(InfiniTensor tensor_proto) +endif() if(USE_BACKTRACE) add_definitions(-D BACKWARD_TRACE) diff --git a/include/core/tensor.h b/include/core/tensor.h index 07d4e3b4..031c4f8d 100644 --- a/include/core/tensor.h +++ b/include/core/tensor.h @@ -26,6 +26,9 @@ class TensorObj : public TensorBaseObj { VType getData(const Shape &pos) const; void dataMalloc(); + void load(std::string file_path); + void save(std::string file_path); + template void copyData(const T *dptr) { IT_ASSERT(DataType::get() == dtype); IT_ASSERT(data != nullptr); @@ -249,4 +252,4 @@ class TensorObj : public TensorBaseObj { // void printShape(); }; -} // namespace infini \ No newline at end of file +} // namespace infini diff --git a/include/utils/dataloader.h b/include/utils/dataloader.h new file mode 100644 index 00000000..8b32d93a --- /dev/null +++ b/include/utils/dataloader.h @@ -0,0 +1,10 @@ +#pragma once +#include "core/runtime.h" +#include + +namespace infini { + +void loadTensorData(TensorObj *tensor, std::string file_path); +void saveTensorData(TensorObj *tensor, std::string file_path); + +} // namespace infini diff --git a/proto/data.proto b/proto/data.proto new file mode 100644 index 00000000..052393c6 --- /dev/null +++ b/proto/data.proto @@ -0,0 +1,45 @@ +syntax = "proto2"; +package data; + +enum TensorLayout { + LAYOUT_NCHW = 1; + LAYOUT_NHWC = 2; + LAYOUT_HWCN = 3; + LAYOUT_NDHWC = 4; + LAYOUT_ARRAY = 5; + LAYOUT_TNC = 6; + LAYOUT_NTC = 7; + LAYOUT_NCDHW = 8; + LAYOUT_NC = 9; + LAYOUT_NLC = 10; +} + +enum DataType { + DTYPE_HALF = 1; + DTYPE_FLOAT = 2; + DTYPE_INT8 = 3; + DTYPE_INT16 = 4; + DTYPE_INT31 = 5; + DTYPE_INT32 = 6; + DTYPE_INT64 = 7; + DTYPE_BFLOAT16 = 8; + DTYPE_DOUBLE = 9; + DTYPE_UINT8 = 10; + DTYPE_UINT16 = 11; + DTYPE_UINT32 = 12; + DTYPE_UINT64 = 13; + DTYPE_BOOL = 14; + DTYPE_INVALID = 15; +} + + +message Tensor +{ + required string id = 1; + repeated int32 shape = 2; + required TensorLayout layout = 3 [default = LAYOUT_ARRAY]; + required DataType dtype = 4 [default = DTYPE_FLOAT]; + repeated float data_float = 5; + repeated int32 data_uint32 = 6; + +} diff --git a/src/core/tensor.cc b/src/core/tensor.cc index 3b9dbb47..ae02ce3d 100644 --- a/src/core/tensor.cc +++ b/src/core/tensor.cc @@ -1,6 +1,7 @@ #include "core/tensor.h" #include "core/blob.h" #include "core/runtime.h" +#include "utils/dataloader.h" namespace infini { @@ -141,6 +142,10 @@ void TensorObj::copyData(const TensorObj *src) { runtime->copyBlob(this, src); } +void TensorObj::load(std::string file_path) { loadTensorData(this, file_path); } + +void TensorObj::save(std::string file_path) { saveTensorData(this, file_path); } + Shape TensorObj::getPosByOffset(size_t offset, Shape dim) const { Shape pos = dim; for (int i = dim.size() - 1; i >= 0; i--) { @@ -172,4 +177,4 @@ size_t TensorObj::getOffsetByBroadcastOffset(size_t bcOffset, return getOffsetByPos(pos, shape); } -}; // namespace infini \ No newline at end of file +}; // namespace infini diff --git a/src/utils/dataloader.cc b/src/utils/dataloader.cc new file mode 100644 index 00000000..1de0cd8d --- /dev/null +++ b/src/utils/dataloader.cc @@ -0,0 +1,81 @@ +#include "utils/dataloader.h" +#include "core/runtime.h" +#include "core/tensor.h" +#ifdef TENSOR_PROTOBUF +#include "data.pb.h" +#endif +#include + +namespace infini { + +void saveTensorData(TensorObj *tensor, std::string file_path) { +#ifdef TENSOR_PROTOBUF + data::Tensor temp; + temp.set_id("tensor_id"); + for (size_t i = 0; i < tensor->getDims().size(); ++i) { + temp.add_shape(tensor->getDims()[i]); + } + temp.set_layout(data::LAYOUT_NHWC); + if (tensor->getDType() == DataType::Float32) { + temp.set_dtype(data::DTYPE_FLOAT); + for (size_t i = 0; i < tensor->size(); ++i) { + temp.add_data_float((tensor->getDataBlob()->getPtr())[i]); + } + } else if (tensor->getDType() == DataType::UInt32) { + temp.set_dtype(data::DTYPE_UINT32); + for (size_t i = 0; i < tensor->size(); ++i) { + temp.add_data_uint32( + (tensor->getDataBlob()->getPtr())[i]); + } + } else { + IT_TODO_HALT(); + } + + std::ofstream fileout(file_path, + std::ios::out | std::ios::trunc | std::ios::binary); + bool flag = temp.SerializeToOstream(&fileout); + if (!flag) { + std::cout << "Failed to write file " + file_path << std::endl; + } + fileout.close(); +#else + std::cout << "If you want to use this feature, please turn on USE_PROTOBUF " + "option in the cmake file." + << std::endl; +#endif +} + +void loadTensorData(TensorObj *tensor, std::string file_path) { +#ifdef TENSOR_PROTOBUF + data::Tensor temp; + std::ifstream filein(file_path, std::ios::in | std::ios::binary); + bool flag = temp.ParseFromIstream(&filein); + if (!flag) { + std::cout << "Failed to read file " + file_path << std::endl; + } + + if (tensor->getDType() == DataType::Float32) { + std::vector data_temp; + for (int i = 0; i < temp.data_float_size(); ++i) { + data_temp.push_back(temp.data_float(i)); + } + tensor->copyData(data_temp); + } else if (tensor->getDType() == DataType::UInt32) { + std::vector data_temp; + for (int i = 0; i < temp.data_uint32_size(); ++i) { + data_temp.push_back(temp.data_uint32(i)); + } + tensor->copyData(data_temp); + } else { + IT_TODO_HALT(); + } + + filein.close(); +#else + std::cout << "If you want to use this feature, please turn on USE_PROTOBUF " + "option in the cmake file." + << std::endl; +#endif +} + +}; // namespace infini diff --git a/test/core/test_tensor_save.cc b/test/core/test_tensor_save.cc new file mode 100644 index 00000000..d310ab84 --- /dev/null +++ b/test/core/test_tensor_save.cc @@ -0,0 +1,33 @@ +#include "core/blob.h" +#include "core/graph.h" +#include "core/runtime.h" +#include "operators/matmul.h" +#include "test.h" + +namespace infini { + +TEST(Prtotbuf, save_and_load) { + Runtime runtime = CpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({1, 3, 4}, DataType::Float32); + Tensor w0 = g->addTensor({1, 3, 4}, DataType::Float32); + Tensor u0 = g->addTensor({1, 3, 4}, DataType::UInt32); + Tensor u1 = g->addTensor({1, 3, 4}, DataType::UInt32); + g->dataMalloc(); + i0->copyData(vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + w0->copyData(vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + u0->copyData(vector{1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 0, 0}); + u1->copyData(vector{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}); + i0->save("i0.pb"); + w0->printData(); + w0->load("i0.pb"); + w0->printData(); + EXPECT_TRUE(w0->equalData(i0)); + u0->save("u.pb"); + u1->printData(); + u1->load("u.pb"); + u1->printData(); + EXPECT_TRUE(u1->equalData(u0)); +} + +} // namespace infini