Tensor serialization (#25)

* use protobuf for tensor data save,write,read, in chinese 序列化和反序列化

* add protobuf

* add code for tensor load & save from/to file

* add code for tensor laod & save

* add code for tensor load & save

* add code for tensor save & load

* add code for tensor save & load

* add code for save & load

* add code for load & save

* add code for tensor load & save

* add code for tensor save & load

Co-authored-by: wanghailu <wanghailu@qiyuanlab.com>
This commit is contained in:
Hardy 2022-09-13 11:27:41 +08:00 committed by GitHub
parent 13b7a2604b
commit 03de74f4bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 201 additions and 2 deletions

View File

@ -5,6 +5,7 @@ project(InfiniTensor C CXX)
# Do not change these options in this file. Use cmake.config, cmake -DOPTION=VALUE, or ccmake to specify them. # Do not change these options in this file. Use cmake.config, cmake -DOPTION=VALUE, or ccmake to specify them.
option(USE_CUDA "Support CUDA GPU" ON) option(USE_CUDA "Support CUDA GPU" ON)
option(USE_BACKTRACE "Print backtrace on exception and segmentation fault" ON) option(USE_BACKTRACE "Print backtrace on exception and segmentation fault" ON)
option(USE_PROTOBUF "Serialize and deserialize tensors" ON)
option(BUILD_TEST "Build tests" ON) option(BUILD_TEST "Build tests" ON)
cmake_dependent_option(BUILD_TEST_CORE "Build tests for core components" ON BUILD_TEST OFF) cmake_dependent_option(BUILD_TEST_CORE "Build tests for core components" ON BUILD_TEST OFF)
@ -32,6 +33,24 @@ endif()
if(OpenMP_CXX_FOUND) if(OpenMP_CXX_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif() endif()
#Protobuf
if(USE_PROTOBUF)
add_definitions(-D TENSOR_PROTOBUF)
find_package(Protobuf REQUIRED)
message(STATUS "protobuf include: " ${PROTOBUF_INCLUDE_DIRS})
message(STATUS "protobuf libraries: " ${PROTOBUF_LIBRARIES})
message(STATUS "protoc executable: " ${PROTOBUF_PROTOC_EXECUTABLE})
include_directories(${PROTOBUF_INCLUDE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
set(PROTO_PATH "${CMAKE_CURRENT_SOURCE_DIR}/proto")
file(GLOB PROTO_FILES "${PROTO_PATH}/data.proto")
protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS ${PROTO_FILES})
message(${PROTO_SRCS} "-----------" ${PROTO_FILES})
message(${PROTO_HDRS} "-----------" ${PROTO_FILES})
add_library(tensor_proto SHARED ${PROTO_SRCS} ${PROTO_HDRS})
target_link_libraries(tensor_proto PUBLIC ${PROTOBUF_LIBRARIES})
endif()
include_directories(include) include_directories(include)
@ -57,6 +76,9 @@ endif()
file(GLOB_RECURSE SRC src/*.cc src/*.cu) file(GLOB_RECURSE SRC src/*.cc src/*.cu)
add_library(InfiniTensor SHARED ${SRC}) add_library(InfiniTensor SHARED ${SRC})
if(USE_PROTOBUF)
target_link_libraries(InfiniTensor tensor_proto)
endif()
if(USE_BACKTRACE) if(USE_BACKTRACE)
add_definitions(-D BACKWARD_TRACE) add_definitions(-D BACKWARD_TRACE)

View File

@ -26,6 +26,9 @@ class TensorObj : public TensorBaseObj {
VType getData(const Shape &pos) const; VType getData(const Shape &pos) const;
void dataMalloc(); void dataMalloc();
void load(std::string file_path);
void save(std::string file_path);
template <typename T> void copyData(const T *dptr) { template <typename T> void copyData(const T *dptr) {
IT_ASSERT(DataType::get<T>() == dtype); IT_ASSERT(DataType::get<T>() == dtype);
IT_ASSERT(data != nullptr); IT_ASSERT(data != nullptr);

View File

@ -0,0 +1,10 @@
#pragma once
#include "core/runtime.h"
#include <string>
namespace infini {
void loadTensorData(TensorObj *tensor, std::string file_path);
void saveTensorData(TensorObj *tensor, std::string file_path);
} // namespace infini

45
proto/data.proto Normal file
View File

@ -0,0 +1,45 @@
syntax = "proto2";
package data;
enum TensorLayout {
LAYOUT_NCHW = 1;
LAYOUT_NHWC = 2;
LAYOUT_HWCN = 3;
LAYOUT_NDHWC = 4;
LAYOUT_ARRAY = 5;
LAYOUT_TNC = 6;
LAYOUT_NTC = 7;
LAYOUT_NCDHW = 8;
LAYOUT_NC = 9;
LAYOUT_NLC = 10;
}
enum DataType {
DTYPE_HALF = 1;
DTYPE_FLOAT = 2;
DTYPE_INT8 = 3;
DTYPE_INT16 = 4;
DTYPE_INT31 = 5;
DTYPE_INT32 = 6;
DTYPE_INT64 = 7;
DTYPE_BFLOAT16 = 8;
DTYPE_DOUBLE = 9;
DTYPE_UINT8 = 10;
DTYPE_UINT16 = 11;
DTYPE_UINT32 = 12;
DTYPE_UINT64 = 13;
DTYPE_BOOL = 14;
DTYPE_INVALID = 15;
}
message Tensor
{
required string id = 1;
repeated int32 shape = 2;
required TensorLayout layout = 3 [default = LAYOUT_ARRAY];
required DataType dtype = 4 [default = DTYPE_FLOAT];
repeated float data_float = 5;
repeated int32 data_uint32 = 6;
}

View File

@ -1,6 +1,7 @@
#include "core/tensor.h" #include "core/tensor.h"
#include "core/blob.h" #include "core/blob.h"
#include "core/runtime.h" #include "core/runtime.h"
#include "utils/dataloader.h"
namespace infini { namespace infini {
@ -141,6 +142,10 @@ void TensorObj::copyData(const TensorObj *src) {
runtime->copyBlob(this, src); runtime->copyBlob(this, src);
} }
void TensorObj::load(std::string file_path) { loadTensorData(this, file_path); }
void TensorObj::save(std::string file_path) { saveTensorData(this, file_path); }
Shape TensorObj::getPosByOffset(size_t offset, Shape dim) const { Shape TensorObj::getPosByOffset(size_t offset, Shape dim) const {
Shape pos = dim; Shape pos = dim;
for (int i = dim.size() - 1; i >= 0; i--) { for (int i = dim.size() - 1; i >= 0; i--) {

81
src/utils/dataloader.cc Normal file
View File

@ -0,0 +1,81 @@
#include "utils/dataloader.h"
#include "core/runtime.h"
#include "core/tensor.h"
#ifdef TENSOR_PROTOBUF
#include "data.pb.h"
#endif
#include <fstream>
namespace infini {
void saveTensorData(TensorObj *tensor, std::string file_path) {
#ifdef TENSOR_PROTOBUF
data::Tensor temp;
temp.set_id("tensor_id");
for (size_t i = 0; i < tensor->getDims().size(); ++i) {
temp.add_shape(tensor->getDims()[i]);
}
temp.set_layout(data::LAYOUT_NHWC);
if (tensor->getDType() == DataType::Float32) {
temp.set_dtype(data::DTYPE_FLOAT);
for (size_t i = 0; i < tensor->size(); ++i) {
temp.add_data_float((tensor->getDataBlob()->getPtr<float *>())[i]);
}
} else if (tensor->getDType() == DataType::UInt32) {
temp.set_dtype(data::DTYPE_UINT32);
for (size_t i = 0; i < tensor->size(); ++i) {
temp.add_data_uint32(
(tensor->getDataBlob()->getPtr<uint32_t *>())[i]);
}
} else {
IT_TODO_HALT();
}
std::ofstream fileout(file_path,
std::ios::out | std::ios::trunc | std::ios::binary);
bool flag = temp.SerializeToOstream(&fileout);
if (!flag) {
std::cout << "Failed to write file " + file_path << std::endl;
}
fileout.close();
#else
std::cout << "If you want to use this feature, please turn on USE_PROTOBUF "
"option in the cmake file."
<< std::endl;
#endif
}
void loadTensorData(TensorObj *tensor, std::string file_path) {
#ifdef TENSOR_PROTOBUF
data::Tensor temp;
std::ifstream filein(file_path, std::ios::in | std::ios::binary);
bool flag = temp.ParseFromIstream(&filein);
if (!flag) {
std::cout << "Failed to read file " + file_path << std::endl;
}
if (tensor->getDType() == DataType::Float32) {
std::vector<float> data_temp;
for (int i = 0; i < temp.data_float_size(); ++i) {
data_temp.push_back(temp.data_float(i));
}
tensor->copyData(data_temp);
} else if (tensor->getDType() == DataType::UInt32) {
std::vector<uint32_t> data_temp;
for (int i = 0; i < temp.data_uint32_size(); ++i) {
data_temp.push_back(temp.data_uint32(i));
}
tensor->copyData(data_temp);
} else {
IT_TODO_HALT();
}
filein.close();
#else
std::cout << "If you want to use this feature, please turn on USE_PROTOBUF "
"option in the cmake file."
<< std::endl;
#endif
}
}; // namespace infini

View File

@ -0,0 +1,33 @@
#include "core/blob.h"
#include "core/graph.h"
#include "core/runtime.h"
#include "operators/matmul.h"
#include "test.h"
namespace infini {
TEST(Prtotbuf, save_and_load) {
Runtime runtime = CpuRuntimeObj::getInstance();
Graph g = make_ref<GraphObj>(runtime);
Tensor i0 = g->addTensor({1, 3, 4}, DataType::Float32);
Tensor w0 = g->addTensor({1, 3, 4}, DataType::Float32);
Tensor u0 = g->addTensor({1, 3, 4}, DataType::UInt32);
Tensor u1 = g->addTensor({1, 3, 4}, DataType::UInt32);
g->dataMalloc();
i0->copyData(vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
w0->copyData(vector<float>{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
u0->copyData(vector<uint32_t>{1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 0, 0});
u1->copyData(vector<uint32_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0});
i0->save("i0.pb");
w0->printData();
w0->load("i0.pb");
w0->printData();
EXPECT_TRUE(w0->equalData(i0));
u0->save("u.pb");
u1->printData();
u1->load("u.pb");
u1->printData();
EXPECT_TRUE(u1->equalData(u0));
}
} // namespace infini