Compare commits

...

1 Commits

Author SHA1 Message Date
kilinchange 9272d709da add a simple mempory pool for allocator 2023-10-19 12:36:01 +08:00
7 changed files with 128 additions and 19 deletions

View File

@ -64,7 +64,11 @@ class GraphObj : public Object {
void optimize();
void dataMalloc(bool useNaiveAllocator = false);
void dataMalloc(bool useNaiveAllocator = false, size_t memPoolSize = 0);
Tensor cloneKV(Tensor &tensor);
void freeHeap();
/**
* @brief Add an operator and create its outputs. Output tensor arguments

View File

@ -95,7 +95,14 @@ class GraphHandlerObj {
//------ runtime
inline void data_malloc() { g->dataMalloc(); }
inline void data_malloc(bool useNaiveAllocator = false,
size_t memPoolSize = 0) {
g->dataMalloc(useNaiveAllocator, memPoolSize);
}
inline Tensor clone_KV(Tensor &tensor) { return g->cloneKV(tensor); }
inline void free_heap() { g->freeHeap(); }
inline void tune() { g->getRuntime()->run(g, true); }

View File

@ -26,14 +26,23 @@ class LazyAllocator {
size_t weightPeak = 0;
size_t heapPeak = 0;
size_t alignment;
bool hasMemPool = false;
size_t memPoolSize = 0;
// pointer to the memory actually allocated
void *ptr = nullptr;
// pointer to the weight memory space
void *weightPtr = nullptr;
// memory pool ptr
void *memPoolPtr = nullptr;
// // a cache designed for a batch size that has already occurred
// std::unordered_map<size_t, std::unordered_map<TensorObj *, size_t>>
// batchsizeToTensorOffset;
@ -68,6 +77,10 @@ class LazyAllocator {
void init();
void setMemPool(size_t memPoolSize);
bool getMemPoolStatus();
// function: simulate memory allocation
// arguments
// size: size of memory block to be allocated
@ -76,6 +89,10 @@ class LazyAllocator {
size_t allocWeight(size_t size);
size_t heapAlloc(size_t size);
void freeHeap();
// function: simulate memory free
// arguments:
// addr: head address offset of memory block to be free
@ -92,6 +109,8 @@ class LazyAllocator {
void *getWeightPtr();
void *getHeapPtr();
void info();
private:

View File

@ -1074,6 +1074,12 @@ class OnnxStub:
def optimize(self) -> None:
self.handler.optimize()
def clone_KV(self, tensor: backend.Tensor) -> backend.Tensor:
return self.handler.clone_KV(tensor)
def free_heap(self) -> None:
self.handler.free_heap()
def tune(self) -> None:
self.handler.tune()

View File

@ -123,10 +123,12 @@ void GraphObj::optimize() {
}
}
void GraphObj::dataMalloc(bool useNaiveAllocator) {
void GraphObj::dataMalloc(bool useNaiveAllocator, size_t memPoolSize) {
// topological sorting first
IT_ASSERT(topo_sort() == true);
if (useNaiveAllocator) {
// can not set memory pool when use naive allocator
IT_ASSERT(memPoolSize == 0);
// used for debugging memory out-of-bounds access, tensors will not be
// released correctly
// note: behavior may not match running in non-naive mode, and it may
@ -136,6 +138,9 @@ void GraphObj::dataMalloc(bool useNaiveAllocator) {
}
return;
}
if (memPoolSize > 0) {
allocator.setMemPool(memPoolSize);
}
// count the number of times all tensors are used
std::unordered_map<TensorObj *, size_t> tensorToRefCount;
// record the memory address offsets of all tensors to be allocated
@ -222,6 +227,27 @@ void GraphObj::dataMalloc(bool useNaiveAllocator) {
}
}
Tensor GraphObj::cloneKV(Tensor &tensor) {
auto obj = tensor->clone();
if (allocator.getMemPoolStatus()) {
if (tensor->hasData()) {
obj->setDataBlob(make_ref<BlobObj>(
tensor->runtime,
static_cast<uint8_t *>(allocator.getHeapPtr()) +
allocator.heapAlloc(tensor->getBytes())));
obj->copyData(tensor);
}
} else {
if (tensor->hasData()) {
obj->dataMalloc();
obj->copyData(tensor);
}
}
return obj;
}
void GraphObj::freeHeap() { this->allocator.freeHeap(); }
Tensor GraphObj::addTensor(Shape dim, DataType dtype) {
return tensors.emplace_back(make_ref<TensorObj>(dim, dtype, runtime));
}

View File

@ -30,6 +30,9 @@ LazyAllocator::~LazyAllocator() {
if (this->weightPtr != nullptr) {
runtime->dealloc(this->weightPtr);
}
if (this->memPoolPtr != nullptr) {
runtime->dealloc(this->memPoolPtr);
}
}
void LazyAllocator::init() {
@ -44,6 +47,17 @@ void LazyAllocator::init() {
this->ptr = nullptr;
}
void LazyAllocator::setMemPool(size_t memPoolSize) {
IT_ASSERT(memPoolSize > 0);
if (!this->hasMemPool) {
this->hasMemPool = true;
this->memPoolSize = memPoolSize;
this->memPoolPtr = runtime->alloc(memPoolSize);
}
}
bool LazyAllocator::getMemPoolStatus() { return this->hasMemPool; }
size_t LazyAllocator::alloc(size_t size) {
// pad the size to the multiple of alignment
size = this->getAlignedSize(size);
@ -102,6 +116,17 @@ size_t LazyAllocator::allocWeight(size_t size) {
return retAddr;
}
size_t LazyAllocator::heapAlloc(size_t size) {
size = this->getAlignedSize(size);
this->heapPeak += size;
IT_ASSERT(this->memPoolSize >=
this->weightPeak + this->peak + this->heapPeak);
size_t retAddr = this->memPoolSize - this->heapPeak;
return retAddr;
}
void LazyAllocator::freeHeap() { this->heapPeak = 0; }
void LazyAllocator::free(size_t addr, size_t size) {
IT_ASSERT(this->ptr == nullptr);
size = getAlignedSize(size);
@ -143,25 +168,40 @@ void LazyAllocator::free(size_t addr, size_t size) {
}
void *LazyAllocator::getPtr() {
if (this->ptr == nullptr) {
this->ptr = runtime->alloc(this->peak);
// #ifdef DEBUG_MODE
// printf("LazyAllocator really alloc non-weight: %p %lu
// bytes\n", this->ptr, peak);
// #endif
if (!hasMemPool) {
if (this->ptr == nullptr) {
this->ptr = runtime->alloc(this->peak);
// #ifdef DEBUG_MODE
// printf("LazyAllocator really alloc non-weight: %p %lu
// bytes\n", this->ptr, peak);
// #endif
}
return this->ptr;
} else {
IT_ASSERT(this->memPoolSize >= this->weightPeak + this->peak);
return static_cast<uint8_t *>(this->memPoolPtr) + weightPeak;
}
return this->ptr;
}
void *LazyAllocator::getWeightPtr() {
if (this->weightPtr == nullptr) {
this->weightPtr = runtime->alloc(this->weightPeak);
// #ifdef DEBUG_MODE
// printf("LazyAllocator really alloc weight: %p %lu bytes\n",
// this->weightPtr, weightPeak);
// #endif
if (!hasMemPool) {
if (this->weightPtr == nullptr) {
this->weightPtr = runtime->alloc(this->weightPeak);
// #ifdef DEBUG_MODE
// printf("LazyAllocator really alloc weight: %p %lu
// bytes\n",
// this->weightPtr, weightPeak);
// #endif
}
return this->weightPtr;
} else {
return this->memPoolPtr;
}
return this->weightPtr;
}
void *LazyAllocator::getHeapPtr() {
IT_ASSERT(hasMemPool);
return this->memPoolPtr;
}
size_t LazyAllocator::getAlignedSize(size_t size) {

View File

@ -437,7 +437,10 @@ void init_graph_builder(py::module &m) {
})
.def("has_target", &TensorObj::hasTarget, policy::automatic)
.def("src", &TensorObj::getSource, policy::move)
.def("printData", &TensorObj::printData, policy::automatic);
.def("printData", &TensorObj::printData, policy::automatic)
.def("copy_data",
py::overload_cast<const Tensor &>(&TensorObj::copyData),
policy::move);
py::class_<OperatorObj, std::shared_ptr<OperatorObj>>(m, "Operator")
.def("op_type", &OperatorObj::getOpType, policy::automatic)
.def("inputs", py::overload_cast<>(&OperatorObj::getInputs, py::const_),
@ -499,7 +502,11 @@ void init_graph_builder(py::module &m) {
.def("topo_sort", &Handler::topo_sort, policy::automatic)
.def("optimize", &Handler::optimize, policy::automatic)
.def("operators", &Handler::operators, policy::move)
.def("data_malloc", &Handler::data_malloc, policy::automatic)
.def("data_malloc", &Handler::data_malloc,
py::arg("useNaiveAllocator") = false, py::arg("memPoolSize") = 0,
policy::automatic)
.def("clone_KV", &Handler::clone_KV, policy::move)
.def("free_heap", &Handler::free_heap, policy::move)
.def("get_perf_time", &Handler::get_perf_time, policy::automatic)
.def("tune", &Handler::tune, policy::automatic)
.def("run", &Handler::run, policy::automatic)