forked from jiuyuan/InfiniTensor
Compare commits
1 Commits
master
...
allocator_
Author | SHA1 | Date |
---|---|---|
kilinchange | 9272d709da |
|
@ -64,7 +64,11 @@ class GraphObj : public Object {
|
|||
|
||||
void optimize();
|
||||
|
||||
void dataMalloc(bool useNaiveAllocator = false);
|
||||
void dataMalloc(bool useNaiveAllocator = false, size_t memPoolSize = 0);
|
||||
|
||||
Tensor cloneKV(Tensor &tensor);
|
||||
|
||||
void freeHeap();
|
||||
|
||||
/**
|
||||
* @brief Add an operator and create its outputs. Output tensor arguments
|
||||
|
|
|
@ -95,7 +95,14 @@ class GraphHandlerObj {
|
|||
|
||||
//------ runtime
|
||||
|
||||
inline void data_malloc() { g->dataMalloc(); }
|
||||
inline void data_malloc(bool useNaiveAllocator = false,
|
||||
size_t memPoolSize = 0) {
|
||||
g->dataMalloc(useNaiveAllocator, memPoolSize);
|
||||
}
|
||||
|
||||
inline Tensor clone_KV(Tensor &tensor) { return g->cloneKV(tensor); }
|
||||
|
||||
inline void free_heap() { g->freeHeap(); }
|
||||
|
||||
inline void tune() { g->getRuntime()->run(g, true); }
|
||||
|
||||
|
|
|
@ -26,14 +26,23 @@ class LazyAllocator {
|
|||
|
||||
size_t weightPeak = 0;
|
||||
|
||||
size_t heapPeak = 0;
|
||||
|
||||
size_t alignment;
|
||||
|
||||
bool hasMemPool = false;
|
||||
|
||||
size_t memPoolSize = 0;
|
||||
|
||||
// pointer to the memory actually allocated
|
||||
void *ptr = nullptr;
|
||||
|
||||
// pointer to the weight memory space
|
||||
void *weightPtr = nullptr;
|
||||
|
||||
// memory pool ptr
|
||||
void *memPoolPtr = nullptr;
|
||||
|
||||
// // a cache designed for a batch size that has already occurred
|
||||
// std::unordered_map<size_t, std::unordered_map<TensorObj *, size_t>>
|
||||
// batchsizeToTensorOffset;
|
||||
|
@ -68,6 +77,10 @@ class LazyAllocator {
|
|||
|
||||
void init();
|
||||
|
||||
void setMemPool(size_t memPoolSize);
|
||||
|
||||
bool getMemPoolStatus();
|
||||
|
||||
// function: simulate memory allocation
|
||||
// arguments:
|
||||
// size: size of memory block to be allocated
|
||||
|
@ -76,6 +89,10 @@ class LazyAllocator {
|
|||
|
||||
size_t allocWeight(size_t size);
|
||||
|
||||
size_t heapAlloc(size_t size);
|
||||
|
||||
void freeHeap();
|
||||
|
||||
// function: simulate memory free
|
||||
// arguments:
|
||||
// addr: head address offset of memory block to be free
|
||||
|
@ -92,6 +109,8 @@ class LazyAllocator {
|
|||
|
||||
void *getWeightPtr();
|
||||
|
||||
void *getHeapPtr();
|
||||
|
||||
void info();
|
||||
|
||||
private:
|
||||
|
|
|
@ -1074,6 +1074,12 @@ class OnnxStub:
|
|||
|
||||
def optimize(self) -> None:
|
||||
self.handler.optimize()
|
||||
|
||||
def clone_KV(self, tensor: backend.Tensor) -> backend.Tensor:
|
||||
return self.handler.clone_KV(tensor)
|
||||
|
||||
def free_heap(self) -> None:
|
||||
self.handler.free_heap()
|
||||
|
||||
def tune(self) -> None:
|
||||
self.handler.tune()
|
||||
|
|
|
@ -123,10 +123,12 @@ void GraphObj::optimize() {
|
|||
}
|
||||
}
|
||||
|
||||
void GraphObj::dataMalloc(bool useNaiveAllocator) {
|
||||
void GraphObj::dataMalloc(bool useNaiveAllocator, size_t memPoolSize) {
|
||||
// topological sorting first
|
||||
IT_ASSERT(topo_sort() == true);
|
||||
if (useNaiveAllocator) {
|
||||
// can not set memory pool when use naive allocator
|
||||
IT_ASSERT(memPoolSize == 0);
|
||||
// used for debugging memory out-of-bounds access, tensors will not be
|
||||
// released correctly
|
||||
// note: behavior may not match running in non-naive mode, and it may
|
||||
|
@ -136,6 +138,9 @@ void GraphObj::dataMalloc(bool useNaiveAllocator) {
|
|||
}
|
||||
return;
|
||||
}
|
||||
if (memPoolSize > 0) {
|
||||
allocator.setMemPool(memPoolSize);
|
||||
}
|
||||
// count the number of times all tensors are used
|
||||
std::unordered_map<TensorObj *, size_t> tensorToRefCount;
|
||||
// record the memory address offsets of all tensors to be allocated
|
||||
|
@ -222,6 +227,27 @@ void GraphObj::dataMalloc(bool useNaiveAllocator) {
|
|||
}
|
||||
}
|
||||
|
||||
Tensor GraphObj::cloneKV(Tensor &tensor) {
|
||||
auto obj = tensor->clone();
|
||||
if (allocator.getMemPoolStatus()) {
|
||||
if (tensor->hasData()) {
|
||||
obj->setDataBlob(make_ref<BlobObj>(
|
||||
tensor->runtime,
|
||||
static_cast<uint8_t *>(allocator.getHeapPtr()) +
|
||||
allocator.heapAlloc(tensor->getBytes())));
|
||||
obj->copyData(tensor);
|
||||
}
|
||||
} else {
|
||||
if (tensor->hasData()) {
|
||||
obj->dataMalloc();
|
||||
obj->copyData(tensor);
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
void GraphObj::freeHeap() { this->allocator.freeHeap(); }
|
||||
|
||||
Tensor GraphObj::addTensor(Shape dim, DataType dtype) {
|
||||
return tensors.emplace_back(make_ref<TensorObj>(dim, dtype, runtime));
|
||||
}
|
||||
|
|
|
@ -30,6 +30,9 @@ LazyAllocator::~LazyAllocator() {
|
|||
if (this->weightPtr != nullptr) {
|
||||
runtime->dealloc(this->weightPtr);
|
||||
}
|
||||
if (this->memPoolPtr != nullptr) {
|
||||
runtime->dealloc(this->memPoolPtr);
|
||||
}
|
||||
}
|
||||
|
||||
void LazyAllocator::init() {
|
||||
|
@ -44,6 +47,17 @@ void LazyAllocator::init() {
|
|||
this->ptr = nullptr;
|
||||
}
|
||||
|
||||
void LazyAllocator::setMemPool(size_t memPoolSize) {
|
||||
IT_ASSERT(memPoolSize > 0);
|
||||
if (!this->hasMemPool) {
|
||||
this->hasMemPool = true;
|
||||
this->memPoolSize = memPoolSize;
|
||||
this->memPoolPtr = runtime->alloc(memPoolSize);
|
||||
}
|
||||
}
|
||||
|
||||
bool LazyAllocator::getMemPoolStatus() { return this->hasMemPool; }
|
||||
|
||||
size_t LazyAllocator::alloc(size_t size) {
|
||||
// pad the size to the multiple of alignment
|
||||
size = this->getAlignedSize(size);
|
||||
|
@ -102,6 +116,17 @@ size_t LazyAllocator::allocWeight(size_t size) {
|
|||
return retAddr;
|
||||
}
|
||||
|
||||
size_t LazyAllocator::heapAlloc(size_t size) {
|
||||
size = this->getAlignedSize(size);
|
||||
this->heapPeak += size;
|
||||
IT_ASSERT(this->memPoolSize >=
|
||||
this->weightPeak + this->peak + this->heapPeak);
|
||||
size_t retAddr = this->memPoolSize - this->heapPeak;
|
||||
return retAddr;
|
||||
}
|
||||
|
||||
void LazyAllocator::freeHeap() { this->heapPeak = 0; }
|
||||
|
||||
void LazyAllocator::free(size_t addr, size_t size) {
|
||||
IT_ASSERT(this->ptr == nullptr);
|
||||
size = getAlignedSize(size);
|
||||
|
@ -143,25 +168,40 @@ void LazyAllocator::free(size_t addr, size_t size) {
|
|||
}
|
||||
|
||||
void *LazyAllocator::getPtr() {
|
||||
if (this->ptr == nullptr) {
|
||||
this->ptr = runtime->alloc(this->peak);
|
||||
// #ifdef DEBUG_MODE
|
||||
// printf("LazyAllocator really alloc non-weight: %p %lu
|
||||
// bytes\n", this->ptr, peak);
|
||||
// #endif
|
||||
if (!hasMemPool) {
|
||||
if (this->ptr == nullptr) {
|
||||
this->ptr = runtime->alloc(this->peak);
|
||||
// #ifdef DEBUG_MODE
|
||||
// printf("LazyAllocator really alloc non-weight: %p %lu
|
||||
// bytes\n", this->ptr, peak);
|
||||
// #endif
|
||||
}
|
||||
return this->ptr;
|
||||
} else {
|
||||
IT_ASSERT(this->memPoolSize >= this->weightPeak + this->peak);
|
||||
return static_cast<uint8_t *>(this->memPoolPtr) + weightPeak;
|
||||
}
|
||||
return this->ptr;
|
||||
}
|
||||
|
||||
void *LazyAllocator::getWeightPtr() {
|
||||
if (this->weightPtr == nullptr) {
|
||||
this->weightPtr = runtime->alloc(this->weightPeak);
|
||||
// #ifdef DEBUG_MODE
|
||||
// printf("LazyAllocator really alloc weight: %p %lu bytes\n",
|
||||
// this->weightPtr, weightPeak);
|
||||
// #endif
|
||||
if (!hasMemPool) {
|
||||
if (this->weightPtr == nullptr) {
|
||||
this->weightPtr = runtime->alloc(this->weightPeak);
|
||||
// #ifdef DEBUG_MODE
|
||||
// printf("LazyAllocator really alloc weight: %p %lu
|
||||
// bytes\n",
|
||||
// this->weightPtr, weightPeak);
|
||||
// #endif
|
||||
}
|
||||
return this->weightPtr;
|
||||
} else {
|
||||
return this->memPoolPtr;
|
||||
}
|
||||
return this->weightPtr;
|
||||
}
|
||||
|
||||
void *LazyAllocator::getHeapPtr() {
|
||||
IT_ASSERT(hasMemPool);
|
||||
return this->memPoolPtr;
|
||||
}
|
||||
|
||||
size_t LazyAllocator::getAlignedSize(size_t size) {
|
||||
|
|
|
@ -437,7 +437,10 @@ void init_graph_builder(py::module &m) {
|
|||
})
|
||||
.def("has_target", &TensorObj::hasTarget, policy::automatic)
|
||||
.def("src", &TensorObj::getSource, policy::move)
|
||||
.def("printData", &TensorObj::printData, policy::automatic);
|
||||
.def("printData", &TensorObj::printData, policy::automatic)
|
||||
.def("copy_data",
|
||||
py::overload_cast<const Tensor &>(&TensorObj::copyData),
|
||||
policy::move);
|
||||
py::class_<OperatorObj, std::shared_ptr<OperatorObj>>(m, "Operator")
|
||||
.def("op_type", &OperatorObj::getOpType, policy::automatic)
|
||||
.def("inputs", py::overload_cast<>(&OperatorObj::getInputs, py::const_),
|
||||
|
@ -499,7 +502,11 @@ void init_graph_builder(py::module &m) {
|
|||
.def("topo_sort", &Handler::topo_sort, policy::automatic)
|
||||
.def("optimize", &Handler::optimize, policy::automatic)
|
||||
.def("operators", &Handler::operators, policy::move)
|
||||
.def("data_malloc", &Handler::data_malloc, policy::automatic)
|
||||
.def("data_malloc", &Handler::data_malloc,
|
||||
py::arg("useNaiveAllocator") = false, py::arg("memPoolSize") = 0,
|
||||
policy::automatic)
|
||||
.def("clone_KV", &Handler::clone_KV, policy::move)
|
||||
.def("free_heap", &Handler::free_heap, policy::move)
|
||||
.def("get_perf_time", &Handler::get_perf_time, policy::automatic)
|
||||
.def("tune", &Handler::tune, policy::automatic)
|
||||
.def("run", &Handler::run, policy::automatic)
|
||||
|
|
Loading…
Reference in New Issue