- add tensorType

- modify allocator to support memory allocation based on tensorType
This commit is contained in:
kilinchange 2023-09-06 11:28:13 +08:00
parent e18900128d
commit 648ed46952
9 changed files with 109 additions and 60 deletions

View File

@ -125,9 +125,9 @@ class GraphObj : public Object {
bool sorted;
/**
* @brief If the persistent tensors are allocated.
* @brief If the weight tensors are allocated.
*/
bool persistentAllocated = false;
bool weightAllocated = false;
};
} // namespace infini

View File

@ -24,15 +24,15 @@ class LazyAllocator {
size_t peak = 0;
size_t persistentPeak = 0;
size_t weightPeak = 0;
size_t alignment;
// pointer to the memory actually allocated
void *ptr = nullptr;
// pointer to the persistent memory space
void *persistentPtr = nullptr;
// pointer to the weight memory space
void *weightPtr = nullptr;
// // a cache designed for a batch size that has already occurred
// std::unordered_map<size_t, std::unordered_map<TensorObj *, size_t>>
@ -74,7 +74,7 @@ class LazyAllocator {
// return: head address offset of the allocated memory block
size_t alloc(size_t size);
size_t allocPersistent(size_t size);
size_t allocWeight(size_t size);
// function: simulate memory free
// arguments:
@ -90,7 +90,7 @@ class LazyAllocator {
// std::unordered_map<TensorObj *, size_t> getCache(size_t batchsize);
void *getPersistentPtr();
void *getWeightPtr();
void info();

View File

@ -1,5 +1,6 @@
#pragma once
#include "core/tensor_base.h"
#include "core/tensor_type.h"
#include "utils/data_convert.h"
#include <cmath>
#include <cstring>
@ -19,7 +20,7 @@ class TensorObj : public TensorBaseObj {
size_t _size; // Cache of Π(shape).
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
// scratch have a new id.
bool persistent = false;
TensorType tensorType = TensorType::others;
public:
TensorObj(Shape shape, DataType dtype, Runtime runtime);
@ -37,8 +38,33 @@ class TensorObj : public TensorBaseObj {
size_t getOffset(const vector<int> &ds) const;
void dataMalloc();
UidBaseType getFuid() const { return fuid; }
bool isPersistent() const { return persistent; }
void setPersistent() { persistent = true; }
bool isWeight() const { return tensorType == TensorType::weight; }
bool isInput() const { return tensorType == TensorType::input; }
bool isOutput() const { return tensorType == TensorType::output; }
bool isOthers() const { return tensorType == TensorType::others; }
void setWeight() { tensorType = TensorType::weight; }
void setInput() { tensorType = TensorType::input; }
void setOutput() { tensorType = TensorType::output; }
string tensorTypeToString() const {
switch (tensorType) {
case TensorType::weight:
return "weight";
break;
case TensorType::input:
return "input";
break;
case TensorType::output:
return "output";
break;
case TensorType::others:
return "others";
break;
default:
return "unknown tensor type";
break;
}
}
void load(std::string file_path);
void save(std::string file_path);

View File

@ -0,0 +1,7 @@
#pragma once
namespace infini {
enum class TensorType { weight, input, output, others };
} // namespace infini

View File

@ -45,11 +45,17 @@ class OnnxStub:
tensors: Dict[str, backend.Tensor] = dict()
data: Dict[str, TensorProto] = dict()
for initializer in model.graph.initializer:
dims = [d for d in initializer.dims]
tensors[initializer.name] = self.handler.tensor(dims, initializer.data_type)
data[initializer.name] = initializer
for input in model.graph.input:
dims = _take_shape_dim(input.type.tensor_type.shape)
tensors[input.name] = self.handler.tensor(
dims, input.type.tensor_type.elem_type
)
if input.name not in tensors.keys():
tensors[input.name] = self.handler.tensor(
dims, input.type.tensor_type.elem_type
)
for output in model.graph.output:
dims = _take_shape_dim(output.type.tensor_type.shape)
@ -57,10 +63,6 @@ class OnnxStub:
dims, output.type.tensor_type.elem_type
)
for initializer in model.graph.initializer:
dims = [d for d in initializer.dims]
tensors[initializer.name] = self.handler.tensor(dims, initializer.data_type)
data[initializer.name] = initializer
node_name = []
new_node_name = []
@ -609,11 +611,17 @@ class OnnxStub:
node_list = list(set(node_name) - set(new_node_name))
################################
# Set weight tensors as persistent
# Set tensor type
################################
for name, obj in tensors.items():
if data.get(name) != None:
obj.set_persistent()
for initializer in model.graph.initializer:
tensors[initializer.name].set_weight()
for input in model.graph.input:
tensors[input.name].set_input()
for output in model.graph.output:
tensors[output.name].set_output()
################################
# Allocate memory space for data

View File

@ -168,54 +168,60 @@ void GraphObj::dataMalloc() {
// reinit allocator
allocator.init();
// record all persistent tensors, including weight tensors and kvcache
// record all weight tensors, including weight tensors and kvcache
// tensors
std::unordered_set<TensorObj *> persistentTensors;
std::unordered_set<TensorObj *> weightTensors;
for (auto &tensor : tensors) {
if (tensor->isPersistent()) {
// allocate memory for all persistent tensors first, and this memory
// will not be reused later
persistentTensors.insert(tensor.get());
if (!this->persistentAllocated) {
if (tensor->isWeight()) {
// allocate memory for all weight tensors first, and this memory
// will not be freed until the graph is destroyed
weightTensors.insert(tensor.get());
if (!this->weightAllocated) {
tensorToOffset[tensor.get()] =
allocator.allocPersistent(tensor->getBytes());
allocator.allocWeight(tensor->getBytes());
}
} else if (tensor->isInput() || tensor->isOutput()) {
// allocate memory for all input and output tensors, and this memory
// will not be reused later
tensorToOffset[tensor.get()] = allocator.alloc(tensor->getBytes());
} else {
tensorToRefCount[tensor.get()] = tensor->getTargets().size();
// allocate memory for all user-created tensors
if (tensor.get()->getSource() == nullptr) {
// allocate memory for input tensors, because it is not the
// output of any op
tensorToOffset[tensor.get()] =
allocator.alloc(tensor->getBytes());
}
}
}
// if memory has not yet been allocated for persistent tensors,
// if memory has not yet been allocated for weight tensors,
// allocate memory now and do not allocate again in the future.
if (!this->persistentAllocated) {
this->persistentAllocated = true;
// only allocate once for persistent tensors
for (auto &tensor : persistentTensors) {
if (!this->weightAllocated) {
this->weightAllocated = true;
// only allocate once for weight tensors
for (auto &tensor : weightTensors) {
IT_ASSERT(tensorToOffset.find(tensor) != tensorToOffset.end());
tensor->setDataBlob(make_ref<BlobObj>(
tensor->runtime,
static_cast<uint8_t *>(allocator.getPersistentPtr()) +
static_cast<uint8_t *>(allocator.getWeightPtr()) +
tensorToOffset[tensor]));
}
}
// traverse in topological order and simulate memory allocation
for (auto &op : ops) {
// memory should be allocated for the output first
// memory should be allocated for the op's output first
auto outputs = op->getOutputs();
for (auto &tensor : outputs) {
tensorToOffset[tensor.get()] = allocator.alloc(tensor->getBytes());
if (tensor->isOthers()) {
tensorToOffset[tensor.get()] =
allocator.alloc(tensor->getBytes());
}
}
auto inputs = op->getInputs();
for (auto &tensor : inputs) {
if (persistentTensors.find(tensor.get()) ==
persistentTensors.end()) {
if (tensor->isOthers()) {
auto tensorIter = tensorToRefCount.find(tensor.get());
IT_ASSERT(tensorIter != tensorToRefCount.end());
IT_ASSERT(tensorToRefCount[tensor.get()] > 0);
tensorToRefCount[tensor.get()] -= 1;
if (tensorToRefCount[tensor.get()] == 0) {
// indicate that this tensor will no longer be used and
@ -228,9 +234,9 @@ void GraphObj::dataMalloc() {
}
}
// perform actual memory allocation for non-persistent tensors
// perform actual memory allocation for non-weight tensors
for (auto &tensor : tensors) {
if (!tensor->isPersistent()) {
if (!tensor->isWeight()) {
IT_ASSERT(tensorToOffset.find(tensor.get()) !=
tensorToOffset.end());
tensor->setDataBlob(make_ref<BlobObj>(

View File

@ -27,8 +27,8 @@ LazyAllocator::~LazyAllocator() {
if (this->ptr != nullptr) {
runtime->dealloc(this->ptr);
}
if (this->persistentPtr != nullptr) {
runtime->dealloc(this->persistentPtr);
if (this->weightPtr != nullptr) {
runtime->dealloc(this->weightPtr);
}
}
@ -91,11 +91,11 @@ size_t LazyAllocator::alloc(size_t size) {
return retAddr;
}
size_t LazyAllocator::allocPersistent(size_t size) {
IT_ASSERT(this->persistentPtr == nullptr);
size_t LazyAllocator::allocWeight(size_t size) {
IT_ASSERT(this->weightPtr == nullptr);
size = this->getAlignedSize(size);
size_t retAddr = this->persistentPeak;
this->persistentPeak += size;
size_t retAddr = this->weightPeak;
this->weightPeak += size;
return retAddr;
}
@ -143,22 +143,22 @@ void *LazyAllocator::getPtr() {
if (this->ptr == nullptr) {
this->ptr = runtime->alloc(this->peak);
#ifdef DEBUG_MODE
printf("LazyAllocator really alloc non-persistent: %p %lu bytes\n",
printf("LazyAllocator really alloc non-weight: %p %lu bytes\n",
this->ptr, peak);
#endif
}
return this->ptr;
}
void *LazyAllocator::getPersistentPtr() {
if (this->persistentPtr == nullptr) {
this->persistentPtr = runtime->alloc(this->persistentPeak);
void *LazyAllocator::getWeightPtr() {
if (this->weightPtr == nullptr) {
this->weightPtr = runtime->alloc(this->weightPeak);
#ifdef DEBUG_MODE
printf("LazyAllocator really alloc persistent: %p %lu bytes\n",
this->persistentPtr, persistentPeak);
printf("LazyAllocator really alloc weight: %p %lu bytes\n",
this->weightPtr, weightPeak);
#endif
}
return this->persistentPtr;
return this->weightPtr;
}
size_t LazyAllocator::getAlignedSize(size_t size) {
@ -166,8 +166,8 @@ size_t LazyAllocator::getAlignedSize(size_t size) {
}
void LazyAllocator::info() {
std::cout << "Used memory: " << this->used + this->persistentPeak
<< ", peak memory: " << this->peak + this->persistentPeak
std::cout << "Used memory: " << this->used + this->weightPeak
<< ", peak memory: " << this->peak + this->weightPeak
<< std::endl;
}

View File

@ -23,7 +23,7 @@ string TensorObj::toString() const {
string ret = "Tensor " + std::to_string(guid) + ", Fuid " +
std::to_string(fuid) + ", shape " + vecToString(shape) +
", dtype " + dtype.toString() + ", " + runtime->toString() +
", " + ss.str() + "\n";
", " + ss.str() + ", " + tensorTypeToString() + "\n";
vector<UidBaseType> targetGuids;
for (const auto &op : targets)
targetGuids.emplace_back(op.lock()->getGuid());

View File

@ -321,7 +321,9 @@ void init_graph_builder(py::module &m) {
py::buffer_protocol())
.def("fuid", &TensorObj::getFuid, policy::automatic)
.def("shape", &TensorObj::getDims, policy::move)
.def("set_persistent", &TensorObj::setPersistent, policy::move)
.def("set_weight", &TensorObj::setWeight, policy::move)
.def("set_input", &TensorObj::setInput, policy::move)
.def("set_output", &TensorObj::setOutput, policy::move)
.def("copyin_float", &TensorObj::copyin<float>, policy::move)
.def("copyin_int32", &TensorObj::copyin<int32_t>, policy::move)
.def("copyin_int64", &TensorObj::copyin<int64_t>, policy::move)