forked from jiuyuan/InfiniTensor
- modify allocator
This commit is contained in:
parent
f000e211f7
commit
e18900128d
|
@ -123,6 +123,11 @@ class GraphObj : public Object {
|
|||
* @brief If the nodes is sorted in topological order.
|
||||
*/
|
||||
bool sorted;
|
||||
|
||||
/**
|
||||
* @brief If the persistent tensors are allocated.
|
||||
*/
|
||||
bool persistentAllocated = false;
|
||||
};
|
||||
|
||||
} // namespace infini
|
||||
|
|
|
@ -20,14 +20,23 @@ class LazyAllocator {
|
|||
|
||||
Runtime runtime;
|
||||
|
||||
size_t used;
|
||||
size_t used = 0;
|
||||
|
||||
size_t peak;
|
||||
size_t peak = 0;
|
||||
|
||||
size_t persistentPeak = 0;
|
||||
|
||||
size_t alignment;
|
||||
|
||||
// pointer to the memory actually allocated
|
||||
void *ptr;
|
||||
void *ptr = nullptr;
|
||||
|
||||
// pointer to the persistent memory space
|
||||
void *persistentPtr = nullptr;
|
||||
|
||||
// // a cache designed for a batch size that has already occurred
|
||||
// std::unordered_map<size_t, std::unordered_map<TensorObj *, size_t>>
|
||||
// batchsizeToTensorOffset;
|
||||
|
||||
struct freeBlockInfo {
|
||||
size_t addr;
|
||||
|
@ -57,12 +66,16 @@ class LazyAllocator {
|
|||
|
||||
virtual ~LazyAllocator();
|
||||
|
||||
void init();
|
||||
|
||||
// function: simulate memory allocation
|
||||
// arguments:
|
||||
// size: size of memory block to be allocated
|
||||
// return: head address offset of the allocated memory block
|
||||
size_t alloc(size_t size);
|
||||
|
||||
size_t allocPersistent(size_t size);
|
||||
|
||||
// function: simulate memory free
|
||||
// arguments:
|
||||
// addr: head address offset of memory block to be free
|
||||
|
@ -73,6 +86,12 @@ class LazyAllocator {
|
|||
// return: pointer to the head address of the allocated memory
|
||||
void *getPtr();
|
||||
|
||||
// void addCache(size_t batchsize, std::unordered_map<TensorObj *, size_t>);
|
||||
|
||||
// std::unordered_map<TensorObj *, size_t> getCache(size_t batchsize);
|
||||
|
||||
void *getPersistentPtr();
|
||||
|
||||
void info();
|
||||
|
||||
private:
|
||||
|
|
|
@ -19,6 +19,8 @@ class TensorObj : public TensorBaseObj {
|
|||
size_t _size; // Cache of Π(shape).
|
||||
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
|
||||
// scratch have a new id.
|
||||
bool persistent = false;
|
||||
|
||||
public:
|
||||
TensorObj(Shape shape, DataType dtype, Runtime runtime);
|
||||
virtual ~TensorObj() {}
|
||||
|
@ -35,6 +37,8 @@ class TensorObj : public TensorBaseObj {
|
|||
size_t getOffset(const vector<int> &ds) const;
|
||||
void dataMalloc();
|
||||
UidBaseType getFuid() const { return fuid; }
|
||||
bool isPersistent() const { return persistent; }
|
||||
void setPersistent() { persistent = true; }
|
||||
|
||||
void load(std::string file_path);
|
||||
void save(std::string file_path);
|
||||
|
|
|
@ -608,6 +608,13 @@ class OnnxStub:
|
|||
# update the node_list
|
||||
node_list = list(set(node_name) - set(new_node_name))
|
||||
|
||||
################################
|
||||
# Set weight tensors as persistent
|
||||
################################
|
||||
for name, obj in tensors.items():
|
||||
if data.get(name) != None:
|
||||
obj.set_persistent()
|
||||
|
||||
################################
|
||||
# Allocate memory space for data
|
||||
################################
|
||||
|
@ -950,8 +957,7 @@ class OnnxStub:
|
|||
oldTensor = self.inputs[oldInput]
|
||||
self.handler.change_shape(newInput, oldTensor.fuid())
|
||||
self.handler.shape_infer()
|
||||
|
||||
# self.handler.data_malloc()
|
||||
self.handler.data_malloc()
|
||||
|
||||
def getShape(self, name: str) -> List[int]:
|
||||
if name in self.inputs:
|
||||
|
|
|
@ -165,16 +165,42 @@ void GraphObj::dataMalloc() {
|
|||
// record the memory address offsets of all tensors to be allocated
|
||||
std::unordered_map<TensorObj *, size_t> tensorToOffset;
|
||||
|
||||
// record all constant tensors, including weight tensors and input tensors
|
||||
std::unordered_set<TensorObj *> constTensor;
|
||||
// reinit allocator
|
||||
allocator.init();
|
||||
|
||||
// record all persistent tensors, including weight tensors and kvcache
|
||||
// tensors
|
||||
std::unordered_set<TensorObj *> persistentTensors;
|
||||
for (auto &tensor : tensors) {
|
||||
if (tensor.get()->getSource() == nullptr) {
|
||||
// allocate memory for all constant tensors first, and this memory
|
||||
if (tensor->isPersistent()) {
|
||||
// allocate memory for all persistent tensors first, and this memory
|
||||
// will not be reused later
|
||||
constTensor.insert(tensor.get());
|
||||
tensorToOffset[tensor.get()] = allocator.alloc(tensor->getBytes());
|
||||
persistentTensors.insert(tensor.get());
|
||||
if (!this->persistentAllocated) {
|
||||
tensorToOffset[tensor.get()] =
|
||||
allocator.allocPersistent(tensor->getBytes());
|
||||
}
|
||||
} else {
|
||||
tensorToRefCount[tensor.get()] = tensor->getTargets().size();
|
||||
if (tensor.get()->getSource() == nullptr) {
|
||||
// allocate memory for input tensors, because it is not the
|
||||
// output of any op
|
||||
tensorToOffset[tensor.get()] =
|
||||
allocator.alloc(tensor->getBytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
// if memory has not yet been allocated for persistent tensors,
|
||||
// allocate memory now and do not allocate again in the future.
|
||||
if (!this->persistentAllocated) {
|
||||
this->persistentAllocated = true;
|
||||
// only allocate once for persistent tensors
|
||||
for (auto &tensor : persistentTensors) {
|
||||
IT_ASSERT(tensorToOffset.find(tensor) != tensorToOffset.end());
|
||||
tensor->setDataBlob(make_ref<BlobObj>(
|
||||
tensor->runtime,
|
||||
static_cast<uint8_t *>(allocator.getPersistentPtr()) +
|
||||
tensorToOffset[tensor]));
|
||||
}
|
||||
}
|
||||
// traverse in topological order and simulate memory allocation
|
||||
|
@ -186,7 +212,8 @@ void GraphObj::dataMalloc() {
|
|||
}
|
||||
auto inputs = op->getInputs();
|
||||
for (auto &tensor : inputs) {
|
||||
if (constTensor.find(tensor.get()) == constTensor.end()) {
|
||||
if (persistentTensors.find(tensor.get()) ==
|
||||
persistentTensors.end()) {
|
||||
auto tensorIter = tensorToRefCount.find(tensor.get());
|
||||
IT_ASSERT(tensorIter != tensorToRefCount.end());
|
||||
tensorToRefCount[tensor.get()] -= 1;
|
||||
|
@ -201,15 +228,20 @@ void GraphObj::dataMalloc() {
|
|||
}
|
||||
}
|
||||
|
||||
// perform actual memory allocation
|
||||
// perform actual memory allocation for non-persistent tensors
|
||||
for (auto &tensor : tensors) {
|
||||
IT_ASSERT(tensorToOffset.find(tensor.get()) != tensorToOffset.end());
|
||||
tensor->setDataBlob(make_ref<BlobObj>(
|
||||
tensor->runtime, static_cast<uint8_t *>(allocator.getPtr()) +
|
||||
tensorToOffset[tensor.get()]));
|
||||
if (!tensor->isPersistent()) {
|
||||
IT_ASSERT(tensorToOffset.find(tensor.get()) !=
|
||||
tensorToOffset.end());
|
||||
tensor->setDataBlob(make_ref<BlobObj>(
|
||||
tensor->runtime, static_cast<uint8_t *>(allocator.getPtr()) +
|
||||
tensorToOffset[tensor.get()]));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MODE
|
||||
allocator.info();
|
||||
#endif
|
||||
}
|
||||
|
||||
Tensor GraphObj::addTensor(Shape dim, DataType dtype) {
|
||||
|
|
|
@ -11,9 +11,6 @@ namespace infini {
|
|||
constexpr size_t alignmentInBytesForCUDA = 256;
|
||||
|
||||
LazyAllocator::LazyAllocator(Runtime runtime) : runtime(runtime) {
|
||||
used = 0;
|
||||
peak = 0;
|
||||
ptr = nullptr;
|
||||
if (runtime->isCuda()) {
|
||||
// TODO: the alignment on cuda might need further discussion
|
||||
alignment = alignmentInBytesForCUDA;
|
||||
|
@ -30,10 +27,21 @@ LazyAllocator::~LazyAllocator() {
|
|||
if (this->ptr != nullptr) {
|
||||
runtime->dealloc(this->ptr);
|
||||
}
|
||||
if (this->persistentPtr != nullptr) {
|
||||
runtime->dealloc(this->persistentPtr);
|
||||
}
|
||||
}
|
||||
|
||||
void LazyAllocator::init() {
|
||||
used = 0;
|
||||
peak = 0;
|
||||
if (this->ptr != nullptr) {
|
||||
runtime->dealloc(this->ptr);
|
||||
}
|
||||
this->ptr = nullptr;
|
||||
}
|
||||
|
||||
size_t LazyAllocator::alloc(size_t size) {
|
||||
IT_ASSERT(this->ptr == nullptr);
|
||||
// pad the size to the multiple of alignment
|
||||
size = this->getAlignedSize(size);
|
||||
auto it = this->freeBlocks.lower_bound(freeBlockInfo{(size_t)0, size});
|
||||
|
@ -83,6 +91,14 @@ size_t LazyAllocator::alloc(size_t size) {
|
|||
return retAddr;
|
||||
}
|
||||
|
||||
size_t LazyAllocator::allocPersistent(size_t size) {
|
||||
IT_ASSERT(this->persistentPtr == nullptr);
|
||||
size = this->getAlignedSize(size);
|
||||
size_t retAddr = this->persistentPeak;
|
||||
this->persistentPeak += size;
|
||||
return retAddr;
|
||||
}
|
||||
|
||||
void LazyAllocator::free(size_t addr, size_t size) {
|
||||
IT_ASSERT(this->ptr == nullptr);
|
||||
size = getAlignedSize(size);
|
||||
|
@ -126,18 +142,33 @@ void LazyAllocator::free(size_t addr, size_t size) {
|
|||
void *LazyAllocator::getPtr() {
|
||||
if (this->ptr == nullptr) {
|
||||
this->ptr = runtime->alloc(this->peak);
|
||||
printf("LazyAllocator really alloc: %p %lu bytes\n", this->ptr, peak);
|
||||
#ifdef DEBUG_MODE
|
||||
printf("LazyAllocator really alloc non-persistent: %p %lu bytes\n",
|
||||
this->ptr, peak);
|
||||
#endif
|
||||
}
|
||||
return this->ptr;
|
||||
}
|
||||
|
||||
void *LazyAllocator::getPersistentPtr() {
|
||||
if (this->persistentPtr == nullptr) {
|
||||
this->persistentPtr = runtime->alloc(this->persistentPeak);
|
||||
#ifdef DEBUG_MODE
|
||||
printf("LazyAllocator really alloc persistent: %p %lu bytes\n",
|
||||
this->persistentPtr, persistentPeak);
|
||||
#endif
|
||||
}
|
||||
return this->persistentPtr;
|
||||
}
|
||||
|
||||
size_t LazyAllocator::getAlignedSize(size_t size) {
|
||||
return ((size - 1) / this->alignment + 1) * this->alignment;
|
||||
}
|
||||
|
||||
void LazyAllocator::info() {
|
||||
std::cout << "Used memory: " << this->used
|
||||
<< ", peak memory: " << this->peak << std::endl;
|
||||
std::cout << "Used memory: " << this->used + this->persistentPeak
|
||||
<< ", peak memory: " << this->peak + this->persistentPeak
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
} // namespace infini
|
||||
|
|
|
@ -321,6 +321,7 @@ void init_graph_builder(py::module &m) {
|
|||
py::buffer_protocol())
|
||||
.def("fuid", &TensorObj::getFuid, policy::automatic)
|
||||
.def("shape", &TensorObj::getDims, policy::move)
|
||||
.def("set_persistent", &TensorObj::setPersistent, policy::move)
|
||||
.def("copyin_float", &TensorObj::copyin<float>, policy::move)
|
||||
.def("copyin_int32", &TensorObj::copyin<int32_t>, policy::move)
|
||||
.def("copyin_int64", &TensorObj::copyin<int64_t>, policy::move)
|
||||
|
|
Loading…
Reference in New Issue