forked from jiuyuan/InfiniTensor
- add tensorType
- modify allocator to support memory allocation based on tensorType
This commit is contained in:
parent
e18900128d
commit
648ed46952
|
@ -125,9 +125,9 @@ class GraphObj : public Object {
|
|||
bool sorted;
|
||||
|
||||
/**
|
||||
* @brief If the persistent tensors are allocated.
|
||||
* @brief If the weight tensors are allocated.
|
||||
*/
|
||||
bool persistentAllocated = false;
|
||||
bool weightAllocated = false;
|
||||
};
|
||||
|
||||
} // namespace infini
|
||||
|
|
|
@ -24,15 +24,15 @@ class LazyAllocator {
|
|||
|
||||
size_t peak = 0;
|
||||
|
||||
size_t persistentPeak = 0;
|
||||
size_t weightPeak = 0;
|
||||
|
||||
size_t alignment;
|
||||
|
||||
// pointer to the memory actually allocated
|
||||
void *ptr = nullptr;
|
||||
|
||||
// pointer to the persistent memory space
|
||||
void *persistentPtr = nullptr;
|
||||
// pointer to the weight memory space
|
||||
void *weightPtr = nullptr;
|
||||
|
||||
// // a cache designed for a batch size that has already occurred
|
||||
// std::unordered_map<size_t, std::unordered_map<TensorObj *, size_t>>
|
||||
|
@ -74,7 +74,7 @@ class LazyAllocator {
|
|||
// return: head address offset of the allocated memory block
|
||||
size_t alloc(size_t size);
|
||||
|
||||
size_t allocPersistent(size_t size);
|
||||
size_t allocWeight(size_t size);
|
||||
|
||||
// function: simulate memory free
|
||||
// arguments:
|
||||
|
@ -90,7 +90,7 @@ class LazyAllocator {
|
|||
|
||||
// std::unordered_map<TensorObj *, size_t> getCache(size_t batchsize);
|
||||
|
||||
void *getPersistentPtr();
|
||||
void *getWeightPtr();
|
||||
|
||||
void info();
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#include "core/tensor_base.h"
|
||||
#include "core/tensor_type.h"
|
||||
#include "utils/data_convert.h"
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
@ -19,7 +20,7 @@ class TensorObj : public TensorBaseObj {
|
|||
size_t _size; // Cache of Π(shape).
|
||||
Fuid fuid; // Cloned tensors share the same id. Tensors constructed from
|
||||
// scratch have a new id.
|
||||
bool persistent = false;
|
||||
TensorType tensorType = TensorType::others;
|
||||
|
||||
public:
|
||||
TensorObj(Shape shape, DataType dtype, Runtime runtime);
|
||||
|
@ -37,8 +38,33 @@ class TensorObj : public TensorBaseObj {
|
|||
size_t getOffset(const vector<int> &ds) const;
|
||||
void dataMalloc();
|
||||
UidBaseType getFuid() const { return fuid; }
|
||||
bool isPersistent() const { return persistent; }
|
||||
void setPersistent() { persistent = true; }
|
||||
bool isWeight() const { return tensorType == TensorType::weight; }
|
||||
bool isInput() const { return tensorType == TensorType::input; }
|
||||
bool isOutput() const { return tensorType == TensorType::output; }
|
||||
bool isOthers() const { return tensorType == TensorType::others; }
|
||||
void setWeight() { tensorType = TensorType::weight; }
|
||||
void setInput() { tensorType = TensorType::input; }
|
||||
void setOutput() { tensorType = TensorType::output; }
|
||||
string tensorTypeToString() const {
|
||||
switch (tensorType) {
|
||||
case TensorType::weight:
|
||||
return "weight";
|
||||
break;
|
||||
case TensorType::input:
|
||||
return "input";
|
||||
break;
|
||||
case TensorType::output:
|
||||
return "output";
|
||||
break;
|
||||
case TensorType::others:
|
||||
return "others";
|
||||
break;
|
||||
|
||||
default:
|
||||
return "unknown tensor type";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void load(std::string file_path);
|
||||
void save(std::string file_path);
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
namespace infini {
|
||||
|
||||
enum class TensorType { weight, input, output, others };
|
||||
|
||||
} // namespace infini
|
|
@ -45,11 +45,17 @@ class OnnxStub:
|
|||
tensors: Dict[str, backend.Tensor] = dict()
|
||||
data: Dict[str, TensorProto] = dict()
|
||||
|
||||
for initializer in model.graph.initializer:
|
||||
dims = [d for d in initializer.dims]
|
||||
tensors[initializer.name] = self.handler.tensor(dims, initializer.data_type)
|
||||
data[initializer.name] = initializer
|
||||
|
||||
for input in model.graph.input:
|
||||
dims = _take_shape_dim(input.type.tensor_type.shape)
|
||||
tensors[input.name] = self.handler.tensor(
|
||||
dims, input.type.tensor_type.elem_type
|
||||
)
|
||||
if input.name not in tensors.keys():
|
||||
tensors[input.name] = self.handler.tensor(
|
||||
dims, input.type.tensor_type.elem_type
|
||||
)
|
||||
|
||||
for output in model.graph.output:
|
||||
dims = _take_shape_dim(output.type.tensor_type.shape)
|
||||
|
@ -57,10 +63,6 @@ class OnnxStub:
|
|||
dims, output.type.tensor_type.elem_type
|
||||
)
|
||||
|
||||
for initializer in model.graph.initializer:
|
||||
dims = [d for d in initializer.dims]
|
||||
tensors[initializer.name] = self.handler.tensor(dims, initializer.data_type)
|
||||
data[initializer.name] = initializer
|
||||
|
||||
node_name = []
|
||||
new_node_name = []
|
||||
|
@ -609,11 +611,17 @@ class OnnxStub:
|
|||
node_list = list(set(node_name) - set(new_node_name))
|
||||
|
||||
################################
|
||||
# Set weight tensors as persistent
|
||||
# Set tensor type
|
||||
################################
|
||||
for name, obj in tensors.items():
|
||||
if data.get(name) != None:
|
||||
obj.set_persistent()
|
||||
for initializer in model.graph.initializer:
|
||||
tensors[initializer.name].set_weight()
|
||||
|
||||
for input in model.graph.input:
|
||||
tensors[input.name].set_input()
|
||||
|
||||
for output in model.graph.output:
|
||||
tensors[output.name].set_output()
|
||||
|
||||
|
||||
################################
|
||||
# Allocate memory space for data
|
||||
|
|
|
@ -168,54 +168,60 @@ void GraphObj::dataMalloc() {
|
|||
// reinit allocator
|
||||
allocator.init();
|
||||
|
||||
// record all persistent tensors, including weight tensors and kvcache
|
||||
// record all weight tensors, including weight tensors and kvcache
|
||||
// tensors
|
||||
std::unordered_set<TensorObj *> persistentTensors;
|
||||
std::unordered_set<TensorObj *> weightTensors;
|
||||
for (auto &tensor : tensors) {
|
||||
if (tensor->isPersistent()) {
|
||||
// allocate memory for all persistent tensors first, and this memory
|
||||
// will not be reused later
|
||||
persistentTensors.insert(tensor.get());
|
||||
if (!this->persistentAllocated) {
|
||||
if (tensor->isWeight()) {
|
||||
// allocate memory for all weight tensors first, and this memory
|
||||
// will not be freed until the graph is destroyed
|
||||
weightTensors.insert(tensor.get());
|
||||
if (!this->weightAllocated) {
|
||||
tensorToOffset[tensor.get()] =
|
||||
allocator.allocPersistent(tensor->getBytes());
|
||||
allocator.allocWeight(tensor->getBytes());
|
||||
}
|
||||
} else if (tensor->isInput() || tensor->isOutput()) {
|
||||
// allocate memory for all input and output tensors, and this memory
|
||||
// will not be reused later
|
||||
tensorToOffset[tensor.get()] = allocator.alloc(tensor->getBytes());
|
||||
} else {
|
||||
tensorToRefCount[tensor.get()] = tensor->getTargets().size();
|
||||
// allocate memory for all user-created tensors
|
||||
if (tensor.get()->getSource() == nullptr) {
|
||||
// allocate memory for input tensors, because it is not the
|
||||
// output of any op
|
||||
tensorToOffset[tensor.get()] =
|
||||
allocator.alloc(tensor->getBytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
// if memory has not yet been allocated for persistent tensors,
|
||||
// if memory has not yet been allocated for weight tensors,
|
||||
// allocate memory now and do not allocate again in the future.
|
||||
if (!this->persistentAllocated) {
|
||||
this->persistentAllocated = true;
|
||||
// only allocate once for persistent tensors
|
||||
for (auto &tensor : persistentTensors) {
|
||||
if (!this->weightAllocated) {
|
||||
this->weightAllocated = true;
|
||||
// only allocate once for weight tensors
|
||||
for (auto &tensor : weightTensors) {
|
||||
IT_ASSERT(tensorToOffset.find(tensor) != tensorToOffset.end());
|
||||
tensor->setDataBlob(make_ref<BlobObj>(
|
||||
tensor->runtime,
|
||||
static_cast<uint8_t *>(allocator.getPersistentPtr()) +
|
||||
static_cast<uint8_t *>(allocator.getWeightPtr()) +
|
||||
tensorToOffset[tensor]));
|
||||
}
|
||||
}
|
||||
// traverse in topological order and simulate memory allocation
|
||||
for (auto &op : ops) {
|
||||
// memory should be allocated for the output first
|
||||
// memory should be allocated for the op's output first
|
||||
auto outputs = op->getOutputs();
|
||||
for (auto &tensor : outputs) {
|
||||
tensorToOffset[tensor.get()] = allocator.alloc(tensor->getBytes());
|
||||
if (tensor->isOthers()) {
|
||||
tensorToOffset[tensor.get()] =
|
||||
allocator.alloc(tensor->getBytes());
|
||||
}
|
||||
}
|
||||
auto inputs = op->getInputs();
|
||||
for (auto &tensor : inputs) {
|
||||
if (persistentTensors.find(tensor.get()) ==
|
||||
persistentTensors.end()) {
|
||||
if (tensor->isOthers()) {
|
||||
auto tensorIter = tensorToRefCount.find(tensor.get());
|
||||
IT_ASSERT(tensorIter != tensorToRefCount.end());
|
||||
IT_ASSERT(tensorToRefCount[tensor.get()] > 0);
|
||||
tensorToRefCount[tensor.get()] -= 1;
|
||||
if (tensorToRefCount[tensor.get()] == 0) {
|
||||
// indicate that this tensor will no longer be used and
|
||||
|
@ -228,9 +234,9 @@ void GraphObj::dataMalloc() {
|
|||
}
|
||||
}
|
||||
|
||||
// perform actual memory allocation for non-persistent tensors
|
||||
// perform actual memory allocation for non-weight tensors
|
||||
for (auto &tensor : tensors) {
|
||||
if (!tensor->isPersistent()) {
|
||||
if (!tensor->isWeight()) {
|
||||
IT_ASSERT(tensorToOffset.find(tensor.get()) !=
|
||||
tensorToOffset.end());
|
||||
tensor->setDataBlob(make_ref<BlobObj>(
|
||||
|
|
|
@ -27,8 +27,8 @@ LazyAllocator::~LazyAllocator() {
|
|||
if (this->ptr != nullptr) {
|
||||
runtime->dealloc(this->ptr);
|
||||
}
|
||||
if (this->persistentPtr != nullptr) {
|
||||
runtime->dealloc(this->persistentPtr);
|
||||
if (this->weightPtr != nullptr) {
|
||||
runtime->dealloc(this->weightPtr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,11 +91,11 @@ size_t LazyAllocator::alloc(size_t size) {
|
|||
return retAddr;
|
||||
}
|
||||
|
||||
size_t LazyAllocator::allocPersistent(size_t size) {
|
||||
IT_ASSERT(this->persistentPtr == nullptr);
|
||||
size_t LazyAllocator::allocWeight(size_t size) {
|
||||
IT_ASSERT(this->weightPtr == nullptr);
|
||||
size = this->getAlignedSize(size);
|
||||
size_t retAddr = this->persistentPeak;
|
||||
this->persistentPeak += size;
|
||||
size_t retAddr = this->weightPeak;
|
||||
this->weightPeak += size;
|
||||
return retAddr;
|
||||
}
|
||||
|
||||
|
@ -143,22 +143,22 @@ void *LazyAllocator::getPtr() {
|
|||
if (this->ptr == nullptr) {
|
||||
this->ptr = runtime->alloc(this->peak);
|
||||
#ifdef DEBUG_MODE
|
||||
printf("LazyAllocator really alloc non-persistent: %p %lu bytes\n",
|
||||
printf("LazyAllocator really alloc non-weight: %p %lu bytes\n",
|
||||
this->ptr, peak);
|
||||
#endif
|
||||
}
|
||||
return this->ptr;
|
||||
}
|
||||
|
||||
void *LazyAllocator::getPersistentPtr() {
|
||||
if (this->persistentPtr == nullptr) {
|
||||
this->persistentPtr = runtime->alloc(this->persistentPeak);
|
||||
void *LazyAllocator::getWeightPtr() {
|
||||
if (this->weightPtr == nullptr) {
|
||||
this->weightPtr = runtime->alloc(this->weightPeak);
|
||||
#ifdef DEBUG_MODE
|
||||
printf("LazyAllocator really alloc persistent: %p %lu bytes\n",
|
||||
this->persistentPtr, persistentPeak);
|
||||
printf("LazyAllocator really alloc weight: %p %lu bytes\n",
|
||||
this->weightPtr, weightPeak);
|
||||
#endif
|
||||
}
|
||||
return this->persistentPtr;
|
||||
return this->weightPtr;
|
||||
}
|
||||
|
||||
size_t LazyAllocator::getAlignedSize(size_t size) {
|
||||
|
@ -166,8 +166,8 @@ size_t LazyAllocator::getAlignedSize(size_t size) {
|
|||
}
|
||||
|
||||
void LazyAllocator::info() {
|
||||
std::cout << "Used memory: " << this->used + this->persistentPeak
|
||||
<< ", peak memory: " << this->peak + this->persistentPeak
|
||||
std::cout << "Used memory: " << this->used + this->weightPeak
|
||||
<< ", peak memory: " << this->peak + this->weightPeak
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ string TensorObj::toString() const {
|
|||
string ret = "Tensor " + std::to_string(guid) + ", Fuid " +
|
||||
std::to_string(fuid) + ", shape " + vecToString(shape) +
|
||||
", dtype " + dtype.toString() + ", " + runtime->toString() +
|
||||
", " + ss.str() + "\n";
|
||||
", " + ss.str() + ", " + tensorTypeToString() + "\n";
|
||||
vector<UidBaseType> targetGuids;
|
||||
for (const auto &op : targets)
|
||||
targetGuids.emplace_back(op.lock()->getGuid());
|
||||
|
|
|
@ -321,7 +321,9 @@ void init_graph_builder(py::module &m) {
|
|||
py::buffer_protocol())
|
||||
.def("fuid", &TensorObj::getFuid, policy::automatic)
|
||||
.def("shape", &TensorObj::getDims, policy::move)
|
||||
.def("set_persistent", &TensorObj::setPersistent, policy::move)
|
||||
.def("set_weight", &TensorObj::setWeight, policy::move)
|
||||
.def("set_input", &TensorObj::setInput, policy::move)
|
||||
.def("set_output", &TensorObj::setOutput, policy::move)
|
||||
.def("copyin_float", &TensorObj::copyin<float>, policy::move)
|
||||
.def("copyin_int32", &TensorObj::copyin<int32_t>, policy::move)
|
||||
.def("copyin_int64", &TensorObj::copyin<int64_t>, policy::move)
|
||||
|
|
Loading…
Reference in New Issue