forked from jiuyuan/InfiniTensor
Compare commits
1 Commits
master
...
allocator_
Author | SHA1 | Date |
---|---|---|
![]() |
9272d709da |
|
@ -64,7 +64,11 @@ class GraphObj : public Object {
|
||||||
|
|
||||||
void optimize();
|
void optimize();
|
||||||
|
|
||||||
void dataMalloc(bool useNaiveAllocator = false);
|
void dataMalloc(bool useNaiveAllocator = false, size_t memPoolSize = 0);
|
||||||
|
|
||||||
|
Tensor cloneKV(Tensor &tensor);
|
||||||
|
|
||||||
|
void freeHeap();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Add an operator and create its outputs. Output tensor arguments
|
* @brief Add an operator and create its outputs. Output tensor arguments
|
||||||
|
|
|
@ -95,7 +95,14 @@ class GraphHandlerObj {
|
||||||
|
|
||||||
//------ runtime
|
//------ runtime
|
||||||
|
|
||||||
inline void data_malloc() { g->dataMalloc(); }
|
inline void data_malloc(bool useNaiveAllocator = false,
|
||||||
|
size_t memPoolSize = 0) {
|
||||||
|
g->dataMalloc(useNaiveAllocator, memPoolSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Tensor clone_KV(Tensor &tensor) { return g->cloneKV(tensor); }
|
||||||
|
|
||||||
|
inline void free_heap() { g->freeHeap(); }
|
||||||
|
|
||||||
inline void tune() { g->getRuntime()->run(g, true); }
|
inline void tune() { g->getRuntime()->run(g, true); }
|
||||||
|
|
||||||
|
|
|
@ -26,14 +26,23 @@ class LazyAllocator {
|
||||||
|
|
||||||
size_t weightPeak = 0;
|
size_t weightPeak = 0;
|
||||||
|
|
||||||
|
size_t heapPeak = 0;
|
||||||
|
|
||||||
size_t alignment;
|
size_t alignment;
|
||||||
|
|
||||||
|
bool hasMemPool = false;
|
||||||
|
|
||||||
|
size_t memPoolSize = 0;
|
||||||
|
|
||||||
// pointer to the memory actually allocated
|
// pointer to the memory actually allocated
|
||||||
void *ptr = nullptr;
|
void *ptr = nullptr;
|
||||||
|
|
||||||
// pointer to the weight memory space
|
// pointer to the weight memory space
|
||||||
void *weightPtr = nullptr;
|
void *weightPtr = nullptr;
|
||||||
|
|
||||||
|
// memory pool ptr
|
||||||
|
void *memPoolPtr = nullptr;
|
||||||
|
|
||||||
// // a cache designed for a batch size that has already occurred
|
// // a cache designed for a batch size that has already occurred
|
||||||
// std::unordered_map<size_t, std::unordered_map<TensorObj *, size_t>>
|
// std::unordered_map<size_t, std::unordered_map<TensorObj *, size_t>>
|
||||||
// batchsizeToTensorOffset;
|
// batchsizeToTensorOffset;
|
||||||
|
@ -68,6 +77,10 @@ class LazyAllocator {
|
||||||
|
|
||||||
void init();
|
void init();
|
||||||
|
|
||||||
|
void setMemPool(size_t memPoolSize);
|
||||||
|
|
||||||
|
bool getMemPoolStatus();
|
||||||
|
|
||||||
// function: simulate memory allocation
|
// function: simulate memory allocation
|
||||||
// arguments:
|
// arguments:
|
||||||
// size: size of memory block to be allocated
|
// size: size of memory block to be allocated
|
||||||
|
@ -76,6 +89,10 @@ class LazyAllocator {
|
||||||
|
|
||||||
size_t allocWeight(size_t size);
|
size_t allocWeight(size_t size);
|
||||||
|
|
||||||
|
size_t heapAlloc(size_t size);
|
||||||
|
|
||||||
|
void freeHeap();
|
||||||
|
|
||||||
// function: simulate memory free
|
// function: simulate memory free
|
||||||
// arguments:
|
// arguments:
|
||||||
// addr: head address offset of memory block to be free
|
// addr: head address offset of memory block to be free
|
||||||
|
@ -92,6 +109,8 @@ class LazyAllocator {
|
||||||
|
|
||||||
void *getWeightPtr();
|
void *getWeightPtr();
|
||||||
|
|
||||||
|
void *getHeapPtr();
|
||||||
|
|
||||||
void info();
|
void info();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -1075,6 +1075,12 @@ class OnnxStub:
|
||||||
def optimize(self) -> None:
|
def optimize(self) -> None:
|
||||||
self.handler.optimize()
|
self.handler.optimize()
|
||||||
|
|
||||||
|
def clone_KV(self, tensor: backend.Tensor) -> backend.Tensor:
|
||||||
|
return self.handler.clone_KV(tensor)
|
||||||
|
|
||||||
|
def free_heap(self) -> None:
|
||||||
|
self.handler.free_heap()
|
||||||
|
|
||||||
def tune(self) -> None:
|
def tune(self) -> None:
|
||||||
self.handler.tune()
|
self.handler.tune()
|
||||||
|
|
||||||
|
|
|
@ -123,10 +123,12 @@ void GraphObj::optimize() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GraphObj::dataMalloc(bool useNaiveAllocator) {
|
void GraphObj::dataMalloc(bool useNaiveAllocator, size_t memPoolSize) {
|
||||||
// topological sorting first
|
// topological sorting first
|
||||||
IT_ASSERT(topo_sort() == true);
|
IT_ASSERT(topo_sort() == true);
|
||||||
if (useNaiveAllocator) {
|
if (useNaiveAllocator) {
|
||||||
|
// can not set memory pool when use naive allocator
|
||||||
|
IT_ASSERT(memPoolSize == 0);
|
||||||
// used for debugging memory out-of-bounds access, tensors will not be
|
// used for debugging memory out-of-bounds access, tensors will not be
|
||||||
// released correctly
|
// released correctly
|
||||||
// note: behavior may not match running in non-naive mode, and it may
|
// note: behavior may not match running in non-naive mode, and it may
|
||||||
|
@ -136,6 +138,9 @@ void GraphObj::dataMalloc(bool useNaiveAllocator) {
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (memPoolSize > 0) {
|
||||||
|
allocator.setMemPool(memPoolSize);
|
||||||
|
}
|
||||||
// count the number of times all tensors are used
|
// count the number of times all tensors are used
|
||||||
std::unordered_map<TensorObj *, size_t> tensorToRefCount;
|
std::unordered_map<TensorObj *, size_t> tensorToRefCount;
|
||||||
// record the memory address offsets of all tensors to be allocated
|
// record the memory address offsets of all tensors to be allocated
|
||||||
|
@ -222,6 +227,27 @@ void GraphObj::dataMalloc(bool useNaiveAllocator) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Tensor GraphObj::cloneKV(Tensor &tensor) {
|
||||||
|
auto obj = tensor->clone();
|
||||||
|
if (allocator.getMemPoolStatus()) {
|
||||||
|
if (tensor->hasData()) {
|
||||||
|
obj->setDataBlob(make_ref<BlobObj>(
|
||||||
|
tensor->runtime,
|
||||||
|
static_cast<uint8_t *>(allocator.getHeapPtr()) +
|
||||||
|
allocator.heapAlloc(tensor->getBytes())));
|
||||||
|
obj->copyData(tensor);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (tensor->hasData()) {
|
||||||
|
obj->dataMalloc();
|
||||||
|
obj->copyData(tensor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GraphObj::freeHeap() { this->allocator.freeHeap(); }
|
||||||
|
|
||||||
Tensor GraphObj::addTensor(Shape dim, DataType dtype) {
|
Tensor GraphObj::addTensor(Shape dim, DataType dtype) {
|
||||||
return tensors.emplace_back(make_ref<TensorObj>(dim, dtype, runtime));
|
return tensors.emplace_back(make_ref<TensorObj>(dim, dtype, runtime));
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,9 @@ LazyAllocator::~LazyAllocator() {
|
||||||
if (this->weightPtr != nullptr) {
|
if (this->weightPtr != nullptr) {
|
||||||
runtime->dealloc(this->weightPtr);
|
runtime->dealloc(this->weightPtr);
|
||||||
}
|
}
|
||||||
|
if (this->memPoolPtr != nullptr) {
|
||||||
|
runtime->dealloc(this->memPoolPtr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void LazyAllocator::init() {
|
void LazyAllocator::init() {
|
||||||
|
@ -44,6 +47,17 @@ void LazyAllocator::init() {
|
||||||
this->ptr = nullptr;
|
this->ptr = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LazyAllocator::setMemPool(size_t memPoolSize) {
|
||||||
|
IT_ASSERT(memPoolSize > 0);
|
||||||
|
if (!this->hasMemPool) {
|
||||||
|
this->hasMemPool = true;
|
||||||
|
this->memPoolSize = memPoolSize;
|
||||||
|
this->memPoolPtr = runtime->alloc(memPoolSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool LazyAllocator::getMemPoolStatus() { return this->hasMemPool; }
|
||||||
|
|
||||||
size_t LazyAllocator::alloc(size_t size) {
|
size_t LazyAllocator::alloc(size_t size) {
|
||||||
// pad the size to the multiple of alignment
|
// pad the size to the multiple of alignment
|
||||||
size = this->getAlignedSize(size);
|
size = this->getAlignedSize(size);
|
||||||
|
@ -102,6 +116,17 @@ size_t LazyAllocator::allocWeight(size_t size) {
|
||||||
return retAddr;
|
return retAddr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t LazyAllocator::heapAlloc(size_t size) {
|
||||||
|
size = this->getAlignedSize(size);
|
||||||
|
this->heapPeak += size;
|
||||||
|
IT_ASSERT(this->memPoolSize >=
|
||||||
|
this->weightPeak + this->peak + this->heapPeak);
|
||||||
|
size_t retAddr = this->memPoolSize - this->heapPeak;
|
||||||
|
return retAddr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LazyAllocator::freeHeap() { this->heapPeak = 0; }
|
||||||
|
|
||||||
void LazyAllocator::free(size_t addr, size_t size) {
|
void LazyAllocator::free(size_t addr, size_t size) {
|
||||||
IT_ASSERT(this->ptr == nullptr);
|
IT_ASSERT(this->ptr == nullptr);
|
||||||
size = getAlignedSize(size);
|
size = getAlignedSize(size);
|
||||||
|
@ -143,6 +168,7 @@ void LazyAllocator::free(size_t addr, size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void *LazyAllocator::getPtr() {
|
void *LazyAllocator::getPtr() {
|
||||||
|
if (!hasMemPool) {
|
||||||
if (this->ptr == nullptr) {
|
if (this->ptr == nullptr) {
|
||||||
this->ptr = runtime->alloc(this->peak);
|
this->ptr = runtime->alloc(this->peak);
|
||||||
// #ifdef DEBUG_MODE
|
// #ifdef DEBUG_MODE
|
||||||
|
@ -151,17 +177,31 @@ void *LazyAllocator::getPtr() {
|
||||||
// #endif
|
// #endif
|
||||||
}
|
}
|
||||||
return this->ptr;
|
return this->ptr;
|
||||||
|
} else {
|
||||||
|
IT_ASSERT(this->memPoolSize >= this->weightPeak + this->peak);
|
||||||
|
return static_cast<uint8_t *>(this->memPoolPtr) + weightPeak;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void *LazyAllocator::getWeightPtr() {
|
void *LazyAllocator::getWeightPtr() {
|
||||||
|
if (!hasMemPool) {
|
||||||
if (this->weightPtr == nullptr) {
|
if (this->weightPtr == nullptr) {
|
||||||
this->weightPtr = runtime->alloc(this->weightPeak);
|
this->weightPtr = runtime->alloc(this->weightPeak);
|
||||||
// #ifdef DEBUG_MODE
|
// #ifdef DEBUG_MODE
|
||||||
// printf("LazyAllocator really alloc weight: %p %lu bytes\n",
|
// printf("LazyAllocator really alloc weight: %p %lu
|
||||||
|
// bytes\n",
|
||||||
// this->weightPtr, weightPeak);
|
// this->weightPtr, weightPeak);
|
||||||
// #endif
|
// #endif
|
||||||
}
|
}
|
||||||
return this->weightPtr;
|
return this->weightPtr;
|
||||||
|
} else {
|
||||||
|
return this->memPoolPtr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void *LazyAllocator::getHeapPtr() {
|
||||||
|
IT_ASSERT(hasMemPool);
|
||||||
|
return this->memPoolPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t LazyAllocator::getAlignedSize(size_t size) {
|
size_t LazyAllocator::getAlignedSize(size_t size) {
|
||||||
|
|
|
@ -437,7 +437,10 @@ void init_graph_builder(py::module &m) {
|
||||||
})
|
})
|
||||||
.def("has_target", &TensorObj::hasTarget, policy::automatic)
|
.def("has_target", &TensorObj::hasTarget, policy::automatic)
|
||||||
.def("src", &TensorObj::getSource, policy::move)
|
.def("src", &TensorObj::getSource, policy::move)
|
||||||
.def("printData", &TensorObj::printData, policy::automatic);
|
.def("printData", &TensorObj::printData, policy::automatic)
|
||||||
|
.def("copy_data",
|
||||||
|
py::overload_cast<const Tensor &>(&TensorObj::copyData),
|
||||||
|
policy::move);
|
||||||
py::class_<OperatorObj, std::shared_ptr<OperatorObj>>(m, "Operator")
|
py::class_<OperatorObj, std::shared_ptr<OperatorObj>>(m, "Operator")
|
||||||
.def("op_type", &OperatorObj::getOpType, policy::automatic)
|
.def("op_type", &OperatorObj::getOpType, policy::automatic)
|
||||||
.def("inputs", py::overload_cast<>(&OperatorObj::getInputs, py::const_),
|
.def("inputs", py::overload_cast<>(&OperatorObj::getInputs, py::const_),
|
||||||
|
@ -499,7 +502,11 @@ void init_graph_builder(py::module &m) {
|
||||||
.def("topo_sort", &Handler::topo_sort, policy::automatic)
|
.def("topo_sort", &Handler::topo_sort, policy::automatic)
|
||||||
.def("optimize", &Handler::optimize, policy::automatic)
|
.def("optimize", &Handler::optimize, policy::automatic)
|
||||||
.def("operators", &Handler::operators, policy::move)
|
.def("operators", &Handler::operators, policy::move)
|
||||||
.def("data_malloc", &Handler::data_malloc, policy::automatic)
|
.def("data_malloc", &Handler::data_malloc,
|
||||||
|
py::arg("useNaiveAllocator") = false, py::arg("memPoolSize") = 0,
|
||||||
|
policy::automatic)
|
||||||
|
.def("clone_KV", &Handler::clone_KV, policy::move)
|
||||||
|
.def("free_heap", &Handler::free_heap, policy::move)
|
||||||
.def("get_perf_time", &Handler::get_perf_time, policy::automatic)
|
.def("get_perf_time", &Handler::get_perf_time, policy::automatic)
|
||||||
.def("tune", &Handler::tune, policy::automatic)
|
.def("tune", &Handler::tune, policy::automatic)
|
||||||
.def("run", &Handler::run, policy::automatic)
|
.def("run", &Handler::run, policy::automatic)
|
||||||
|
|
Loading…
Reference in New Issue