- add tensorType

- modify allocator to support memory allocation based on tensorType
2023-09-06 11:28:13 +08:00 · 2023-09-06 11:28:13 +08:00 · 648ed46952
parent e18900128d
commit 648ed46952
9 changed files with 109 additions and 60 deletions
--- a/include/core/graph.h
+++ b/include/core/graph.h
@ -125,9 +125,9 @@ class GraphObj : public Object {
    bool sorted;

    /**
-     * @brief If the persistent tensors are allocated.
+     * @brief If the weight tensors are allocated.
     */
-    bool persistentAllocated = false;
+    bool weightAllocated = false;
 };

 } // namespace infini
--- a/include/core/lazy_allocator.h
+++ b/include/core/lazy_allocator.h
@ -24,15 +24,15 @@ class LazyAllocator {

    size_t peak = 0;

-    size_t persistentPeak = 0;
+    size_t weightPeak = 0;

    size_t alignment;

    // pointer to the memory actually allocated
    void *ptr = nullptr;

-    // pointer to the persistent memory space
-    void *persistentPtr = nullptr;
+    // pointer to the weight memory space
+    void *weightPtr = nullptr;

    // // a cache designed for a batch size that has already occurred
    // std::unordered_map<size_t, std::unordered_map<TensorObj *, size_t>>
@ -74,7 +74,7 @@ class LazyAllocator {
    // return: head address offset of the allocated memory block
    size_t alloc(size_t size);

-    size_t allocPersistent(size_t size);
+    size_t allocWeight(size_t size);

    // function: simulate memory free
    // arguments:
@ -90,7 +90,7 @@ class LazyAllocator {

    // std::unordered_map<TensorObj *, size_t> getCache(size_t batchsize);

-    void *getPersistentPtr();
+    void *getWeightPtr();

    void info();

--- a/include/core/tensor.h
+++ b/include/core/tensor.h
@ -1,5 +1,6 @@
 #pragma once
 #include "core/tensor_base.h"
+#include "core/tensor_type.h"
 #include "utils/data_convert.h"
 #include <cmath>
 #include <cstring>
@ -19,7 +20,7 @@ class TensorObj : public TensorBaseObj {
    size_t _size; // Cache of Π(shape).
    Fuid fuid;    // Cloned tensors share the same id. Tensors constructed from
                  // scratch have a new id.
-    bool persistent = false;
+    TensorType tensorType = TensorType::others;

  public:
    TensorObj(Shape shape, DataType dtype, Runtime runtime);
@ -37,8 +38,33 @@ class TensorObj : public TensorBaseObj {
    size_t getOffset(const vector<int> &ds) const;
    void dataMalloc();
    UidBaseType getFuid() const { return fuid; }
-    bool isPersistent() const { return persistent; }
-    void setPersistent() { persistent = true; }
+    bool isWeight() const { return tensorType == TensorType::weight; }
+    bool isInput() const { return tensorType == TensorType::input; }
+    bool isOutput() const { return tensorType == TensorType::output; }
+    bool isOthers() const { return tensorType == TensorType::others; }
+    void setWeight() { tensorType = TensorType::weight; }
+    void setInput() { tensorType = TensorType::input; }
+    void setOutput() { tensorType = TensorType::output; }
+    string tensorTypeToString() const {
+        switch (tensorType) {
+        case TensorType::weight:
+            return "weight";
+            break;
+        case TensorType::input:
+            return "input";
+            break;
+        case TensorType::output:
+            return "output";
+            break;
+        case TensorType::others:
+            return "others";
+            break;
+
+        default:
+            return "unknown tensor type";
+            break;
+        }
+    }

    void load(std::string file_path);
    void save(std::string file_path);
--- a/include/core/tensor_type.h
+++ b/include/core/tensor_type.h
@ -0,0 +1,7 @@
+#pragma once
+
+namespace infini {
+
+enum class TensorType { weight, input, output, others };
+
+} // namespace infini
--- a/pyinfinitensor/src/pyinfinitensor/onnx.py
+++ b/pyinfinitensor/src/pyinfinitensor/onnx.py
@ -45,11 +45,17 @@ class OnnxStub:
        tensors: Dict[str, backend.Tensor] = dict()
        data: Dict[str, TensorProto] = dict()

+        for initializer in model.graph.initializer:
+            dims = [d for d in initializer.dims]
+            tensors[initializer.name] = self.handler.tensor(dims, initializer.data_type)
+            data[initializer.name] = initializer
+
        for input in model.graph.input:
            dims = _take_shape_dim(input.type.tensor_type.shape)
-            tensors[input.name] = self.handler.tensor(
-                dims, input.type.tensor_type.elem_type
-            )
+            if input.name not in tensors.keys():
+                tensors[input.name] = self.handler.tensor(
+                    dims, input.type.tensor_type.elem_type
+                )

        for output in model.graph.output:
            dims = _take_shape_dim(output.type.tensor_type.shape)
@ -57,10 +63,6 @@ class OnnxStub:
                dims, output.type.tensor_type.elem_type
            )

-        for initializer in model.graph.initializer:
-            dims = [d for d in initializer.dims]
-            tensors[initializer.name] = self.handler.tensor(dims, initializer.data_type)
-            data[initializer.name] = initializer

        node_name = []
        new_node_name = []
@ -609,11 +611,17 @@ class OnnxStub:
            node_list = list(set(node_name) - set(new_node_name))

        ################################
-        # Set weight tensors as persistent
+        # Set tensor type
        ################################
-        for name, obj in tensors.items():
-            if data.get(name) != None:
-                obj.set_persistent()
+        for initializer in model.graph.initializer:
+            tensors[initializer.name].set_weight()
+
+        for input in model.graph.input:
+            tensors[input.name].set_input()
+        
+        for output in model.graph.output:
+            tensors[output.name].set_output()
+

        ################################
        # Allocate memory space for data
--- a/src/core/graph.cc
+++ b/src/core/graph.cc
@ -168,54 +168,60 @@ void GraphObj::dataMalloc() {
    // reinit allocator
    allocator.init();

-    // record all persistent tensors, including weight tensors and kvcache
+    // record all weight tensors, including weight tensors and kvcache
    // tensors
-    std::unordered_set<TensorObj *> persistentTensors;
+    std::unordered_set<TensorObj *> weightTensors;
    for (auto &tensor : tensors) {
-        if (tensor->isPersistent()) {
-            // allocate memory for all persistent tensors first, and this memory
-            // will not be reused later
-            persistentTensors.insert(tensor.get());
-            if (!this->persistentAllocated) {
+        if (tensor->isWeight()) {
+            // allocate memory for all weight tensors first, and this memory
+            // will not be freed until the graph is destroyed
+            weightTensors.insert(tensor.get());
+            if (!this->weightAllocated) {
                tensorToOffset[tensor.get()] =
-                    allocator.allocPersistent(tensor->getBytes());
+                    allocator.allocWeight(tensor->getBytes());
            }
+        } else if (tensor->isInput() || tensor->isOutput()) {
+            // allocate memory for all input and output tensors, and this memory
+            // will not be reused later
+            tensorToOffset[tensor.get()] = allocator.alloc(tensor->getBytes());
        } else {
            tensorToRefCount[tensor.get()] = tensor->getTargets().size();
+            // allocate memory for all user-created tensors
            if (tensor.get()->getSource() == nullptr) {
-                // allocate memory for input tensors, because it is not the
-                // output of any op
                tensorToOffset[tensor.get()] =
                    allocator.alloc(tensor->getBytes());
            }
        }
    }
-    // if memory has not yet been allocated for persistent tensors,
+    // if memory has not yet been allocated for weight tensors,
    // allocate memory now and do not allocate again in the future.
-    if (!this->persistentAllocated) {
-        this->persistentAllocated = true;
-        // only allocate once for persistent tensors
-        for (auto &tensor : persistentTensors) {
+    if (!this->weightAllocated) {
+        this->weightAllocated = true;
+        // only allocate once for weight tensors
+        for (auto &tensor : weightTensors) {
            IT_ASSERT(tensorToOffset.find(tensor) != tensorToOffset.end());
            tensor->setDataBlob(make_ref<BlobObj>(
                tensor->runtime,
-                static_cast<uint8_t *>(allocator.getPersistentPtr()) +
+                static_cast<uint8_t *>(allocator.getWeightPtr()) +
                    tensorToOffset[tensor]));
        }
    }
    // traverse in topological order and simulate memory allocation
    for (auto &op : ops) {
-        // memory should be allocated for the output first
+        // memory should be allocated for the op's output first
        auto outputs = op->getOutputs();
        for (auto &tensor : outputs) {
-            tensorToOffset[tensor.get()] = allocator.alloc(tensor->getBytes());
+            if (tensor->isOthers()) {
+                tensorToOffset[tensor.get()] =
+                    allocator.alloc(tensor->getBytes());
+            }
        }
        auto inputs = op->getInputs();
        for (auto &tensor : inputs) {
-            if (persistentTensors.find(tensor.get()) ==
-                persistentTensors.end()) {
+            if (tensor->isOthers()) {
                auto tensorIter = tensorToRefCount.find(tensor.get());
                IT_ASSERT(tensorIter != tensorToRefCount.end());
+                IT_ASSERT(tensorToRefCount[tensor.get()] > 0);
                tensorToRefCount[tensor.get()] -= 1;
                if (tensorToRefCount[tensor.get()] == 0) {
                    // indicate that this tensor will no longer be used and
@ -228,9 +234,9 @@ void GraphObj::dataMalloc() {
        }
    }

-    // perform actual memory allocation for non-persistent tensors
+    // perform actual memory allocation for non-weight tensors
    for (auto &tensor : tensors) {
-        if (!tensor->isPersistent()) {
+        if (!tensor->isWeight()) {
            IT_ASSERT(tensorToOffset.find(tensor.get()) !=
                      tensorToOffset.end());
            tensor->setDataBlob(make_ref<BlobObj>(
--- a/src/core/lazy_allocator.cc
+++ b/src/core/lazy_allocator.cc
@ -27,8 +27,8 @@ LazyAllocator::~LazyAllocator() {
    if (this->ptr != nullptr) {
        runtime->dealloc(this->ptr);
    }
-    if (this->persistentPtr != nullptr) {
-        runtime->dealloc(this->persistentPtr);
+    if (this->weightPtr != nullptr) {
+        runtime->dealloc(this->weightPtr);
    }
 }

@ -91,11 +91,11 @@ size_t LazyAllocator::alloc(size_t size) {
    return retAddr;
 }

-size_t LazyAllocator::allocPersistent(size_t size) {
-    IT_ASSERT(this->persistentPtr == nullptr);
+size_t LazyAllocator::allocWeight(size_t size) {
+    IT_ASSERT(this->weightPtr == nullptr);
    size = this->getAlignedSize(size);
-    size_t retAddr = this->persistentPeak;
-    this->persistentPeak += size;
+    size_t retAddr = this->weightPeak;
+    this->weightPeak += size;
    return retAddr;
 }

@ -143,22 +143,22 @@ void *LazyAllocator::getPtr() {
    if (this->ptr == nullptr) {
        this->ptr = runtime->alloc(this->peak);
 #ifdef DEBUG_MODE
-        printf("LazyAllocator really alloc non-persistent: %p %lu bytes\n",
+        printf("LazyAllocator really alloc non-weight: %p %lu bytes\n",
               this->ptr, peak);
 #endif
    }
    return this->ptr;
 }

-void *LazyAllocator::getPersistentPtr() {
-    if (this->persistentPtr == nullptr) {
-        this->persistentPtr = runtime->alloc(this->persistentPeak);
+void *LazyAllocator::getWeightPtr() {
+    if (this->weightPtr == nullptr) {
+        this->weightPtr = runtime->alloc(this->weightPeak);
 #ifdef DEBUG_MODE
-        printf("LazyAllocator really alloc persistent: %p %lu bytes\n",
-               this->persistentPtr, persistentPeak);
+        printf("LazyAllocator really alloc weight: %p %lu bytes\n",
+               this->weightPtr, weightPeak);
 #endif
    }
-    return this->persistentPtr;
+    return this->weightPtr;
 }

 size_t LazyAllocator::getAlignedSize(size_t size) {
@ -166,8 +166,8 @@ size_t LazyAllocator::getAlignedSize(size_t size) {
 }

 void LazyAllocator::info() {
-    std::cout << "Used memory: " << this->used + this->persistentPeak
-              << ", peak memory: " << this->peak + this->persistentPeak
+    std::cout << "Used memory: " << this->used + this->weightPeak
+              << ", peak memory: " << this->peak + this->weightPeak
              << std::endl;
 }

--- a/src/core/tensor.cc
+++ b/src/core/tensor.cc
@ -23,7 +23,7 @@ string TensorObj::toString() const {
    string ret = "Tensor " + std::to_string(guid) + ", Fuid " +
                 std::to_string(fuid) + ", shape " + vecToString(shape) +
                 ", dtype " + dtype.toString() + ", " + runtime->toString() +
-                 ", " + ss.str() + "\n";
+                 ", " + ss.str() + ", " + tensorTypeToString() + "\n";
    vector<UidBaseType> targetGuids;
    for (const auto &op : targets)
        targetGuids.emplace_back(op.lock()->getGuid());
--- a/src/ffi/ffi_infinitensor.cc
+++ b/src/ffi/ffi_infinitensor.cc
@ -321,7 +321,9 @@ void init_graph_builder(py::module &m) {
                                                      py::buffer_protocol())
        .def("fuid", &TensorObj::getFuid, policy::automatic)
        .def("shape", &TensorObj::getDims, policy::move)
-        .def("set_persistent", &TensorObj::setPersistent, policy::move)
+        .def("set_weight", &TensorObj::setWeight, policy::move)
+        .def("set_input", &TensorObj::setInput, policy::move)
+        .def("set_output", &TensorObj::setOutput, policy::move)
        .def("copyin_float", &TensorObj::copyin<float>, policy::move)
        .def("copyin_int32", &TensorObj::copyin<int32_t>, policy::move)
        .def("copyin_int64", &TensorObj::copyin<int64_t>, policy::move)