- modify allocator

This commit is contained in:
kilinchange 2023-09-04 19:20:35 +08:00
parent f000e211f7
commit e18900128d
7 changed files with 122 additions and 24 deletions

View File

@ -123,6 +123,11 @@ class GraphObj : public Object {
* @brief If the nodes is sorted in topological order.
*/
bool sorted;
/**
* @brief If the persistent tensors are allocated.
*/
bool persistentAllocated = false;
};
} // namespace infini

View File

@ -20,14 +20,23 @@ class LazyAllocator {
Runtime runtime;
size_t used;
size_t used = 0;
size_t peak;
size_t peak = 0;
size_t persistentPeak = 0;
size_t alignment;
// pointer to the memory actually allocated
void *ptr;
void *ptr = nullptr;
// pointer to the persistent memory space
void *persistentPtr = nullptr;
// // a cache designed for a batch size that has already occurred
// std::unordered_map<size_t, std::unordered_map<TensorObj *, size_t>>
// batchsizeToTensorOffset;
struct freeBlockInfo {
size_t addr;
@ -57,12 +66,16 @@ class LazyAllocator {
virtual ~LazyAllocator();
void init();
// function: simulate memory allocation
// arguments
// size: size of memory block to be allocated
// return: head address offset of the allocated memory block
size_t alloc(size_t size);
size_t allocPersistent(size_t size);
// function: simulate memory free
// arguments:
// addr: head address offset of memory block to be free
@ -73,6 +86,12 @@ class LazyAllocator {
// return: pointer to the head address of the allocated memory
void *getPtr();
// void addCache(size_t batchsize, std::unordered_map<TensorObj *, size_t>);
// std::unordered_map<TensorObj *, size_t> getCache(size_t batchsize);
void *getPersistentPtr();
void info();
private:

View File

@ -19,6 +19,8 @@ class TensorObj : public TensorBaseObj {
size_t _size; // Cache of Π(shape).
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
// scratch have a new id.
bool persistent = false;
public:
TensorObj(Shape shape, DataType dtype, Runtime runtime);
virtual ~TensorObj() {}
@ -35,6 +37,8 @@ class TensorObj : public TensorBaseObj {
size_t getOffset(const vector<int> &ds) const;
void dataMalloc();
UidBaseType getFuid() const { return fuid; }
bool isPersistent() const { return persistent; }
void setPersistent() { persistent = true; }
void load(std::string file_path);
void save(std::string file_path);

View File

@ -608,6 +608,13 @@ class OnnxStub:
# update the node_list
node_list = list(set(node_name) - set(new_node_name))
################################
# Set weight tensors as persistent
################################
for name, obj in tensors.items():
if data.get(name) != None:
obj.set_persistent()
################################
# Allocate memory space for data
################################
@ -950,8 +957,7 @@ class OnnxStub:
oldTensor = self.inputs[oldInput]
self.handler.change_shape(newInput, oldTensor.fuid())
self.handler.shape_infer()
# self.handler.data_malloc()
self.handler.data_malloc()
def getShape(self, name: str) -> List[int]:
if name in self.inputs:

View File

@ -165,16 +165,42 @@ void GraphObj::dataMalloc() {
// record the memory address offsets of all tensors to be allocated
std::unordered_map<TensorObj *, size_t> tensorToOffset;
// record all constant tensors, including weight tensors and input tensors
std::unordered_set<TensorObj *> constTensor;
// reinit allocator
allocator.init();
// record all persistent tensors, including weight tensors and kvcache
// tensors
std::unordered_set<TensorObj *> persistentTensors;
for (auto &tensor : tensors) {
if (tensor.get()->getSource() == nullptr) {
// allocate memory for all constant tensors first, and this memory
if (tensor->isPersistent()) {
// allocate memory for all persistent tensors first, and this memory
// will not be reused later
constTensor.insert(tensor.get());
tensorToOffset[tensor.get()] = allocator.alloc(tensor->getBytes());
persistentTensors.insert(tensor.get());
if (!this->persistentAllocated) {
tensorToOffset[tensor.get()] =
allocator.allocPersistent(tensor->getBytes());
}
} else {
tensorToRefCount[tensor.get()] = tensor->getTargets().size();
if (tensor.get()->getSource() == nullptr) {
// allocate memory for input tensors, because it is not the
// output of any op
tensorToOffset[tensor.get()] =
allocator.alloc(tensor->getBytes());
}
}
}
// if memory has not yet been allocated for persistent tensors,
// allocate memory now and do not allocate again in the future.
if (!this->persistentAllocated) {
this->persistentAllocated = true;
// only allocate once for persistent tensors
for (auto &tensor : persistentTensors) {
IT_ASSERT(tensorToOffset.find(tensor) != tensorToOffset.end());
tensor->setDataBlob(make_ref<BlobObj>(
tensor->runtime,
static_cast<uint8_t *>(allocator.getPersistentPtr()) +
tensorToOffset[tensor]));
}
}
// traverse in topological order and simulate memory allocation
@ -186,7 +212,8 @@ void GraphObj::dataMalloc() {
}
auto inputs = op->getInputs();
for (auto &tensor : inputs) {
if (constTensor.find(tensor.get()) == constTensor.end()) {
if (persistentTensors.find(tensor.get()) ==
persistentTensors.end()) {
auto tensorIter = tensorToRefCount.find(tensor.get());
IT_ASSERT(tensorIter != tensorToRefCount.end());
tensorToRefCount[tensor.get()] -= 1;
@ -201,15 +228,20 @@ void GraphObj::dataMalloc() {
}
}
// perform actual memory allocation
// perform actual memory allocation for non-persistent tensors
for (auto &tensor : tensors) {
IT_ASSERT(tensorToOffset.find(tensor.get()) != tensorToOffset.end());
tensor->setDataBlob(make_ref<BlobObj>(
tensor->runtime, static_cast<uint8_t *>(allocator.getPtr()) +
tensorToOffset[tensor.get()]));
if (!tensor->isPersistent()) {
IT_ASSERT(tensorToOffset.find(tensor.get()) !=
tensorToOffset.end());
tensor->setDataBlob(make_ref<BlobObj>(
tensor->runtime, static_cast<uint8_t *>(allocator.getPtr()) +
tensorToOffset[tensor.get()]));
}
}
#ifdef DEBUG_MODE
allocator.info();
#endif
}
Tensor GraphObj::addTensor(Shape dim, DataType dtype) {

View File

@ -11,9 +11,6 @@ namespace infini {
constexpr size_t alignmentInBytesForCUDA = 256;
LazyAllocator::LazyAllocator(Runtime runtime) : runtime(runtime) {
used = 0;
peak = 0;
ptr = nullptr;
if (runtime->isCuda()) {
// TODO: the alignment on cuda might need further discussion
alignment = alignmentInBytesForCUDA;
@ -30,10 +27,21 @@ LazyAllocator::~LazyAllocator() {
if (this->ptr != nullptr) {
runtime->dealloc(this->ptr);
}
if (this->persistentPtr != nullptr) {
runtime->dealloc(this->persistentPtr);
}
}
void LazyAllocator::init() {
used = 0;
peak = 0;
if (this->ptr != nullptr) {
runtime->dealloc(this->ptr);
}
this->ptr = nullptr;
}
size_t LazyAllocator::alloc(size_t size) {
IT_ASSERT(this->ptr == nullptr);
// pad the size to the multiple of alignment
size = this->getAlignedSize(size);
auto it = this->freeBlocks.lower_bound(freeBlockInfo{(size_t)0, size});
@ -83,6 +91,14 @@ size_t LazyAllocator::alloc(size_t size) {
return retAddr;
}
size_t LazyAllocator::allocPersistent(size_t size) {
IT_ASSERT(this->persistentPtr == nullptr);
size = this->getAlignedSize(size);
size_t retAddr = this->persistentPeak;
this->persistentPeak += size;
return retAddr;
}
void LazyAllocator::free(size_t addr, size_t size) {
IT_ASSERT(this->ptr == nullptr);
size = getAlignedSize(size);
@ -126,18 +142,33 @@ void LazyAllocator::free(size_t addr, size_t size) {
void *LazyAllocator::getPtr() {
if (this->ptr == nullptr) {
this->ptr = runtime->alloc(this->peak);
printf("LazyAllocator really alloc: %p %lu bytes\n", this->ptr, peak);
#ifdef DEBUG_MODE
printf("LazyAllocator really alloc non-persistent: %p %lu bytes\n",
this->ptr, peak);
#endif
}
return this->ptr;
}
void *LazyAllocator::getPersistentPtr() {
if (this->persistentPtr == nullptr) {
this->persistentPtr = runtime->alloc(this->persistentPeak);
#ifdef DEBUG_MODE
printf("LazyAllocator really alloc persistent: %p %lu bytes\n",
this->persistentPtr, persistentPeak);
#endif
}
return this->persistentPtr;
}
size_t LazyAllocator::getAlignedSize(size_t size) {
return ((size - 1) / this->alignment + 1) * this->alignment;
}
void LazyAllocator::info() {
std::cout << "Used memory: " << this->used
<< ", peak memory: " << this->peak << std::endl;
std::cout << "Used memory: " << this->used + this->persistentPeak
<< ", peak memory: " << this->peak + this->persistentPeak
<< std::endl;
}
} // namespace infini

View File

@ -321,6 +321,7 @@ void init_graph_builder(py::module &m) {
py::buffer_protocol())
.def("fuid", &TensorObj::getFuid, policy::automatic)
.def("shape", &TensorObj::getDims, policy::move)
.def("set_persistent", &TensorObj::setPersistent, policy::move)
.def("copyin_float", &TensorObj::copyin<float>, policy::move)
.def("copyin_int32", &TensorObj::copyin<int32_t>, policy::move)
.def("copyin_int64", &TensorObj::copyin<int64_t>, policy::move)