diff --git a/Makefile b/Makefile
index d6dfadd2..01784937 100644
--- a/Makefile
+++ b/Makefile
@@ -54,6 +54,10 @@ test-onnx:
 	@echo
 	python3 pyinfinitensor/tests/test_onnx.py
 
+test-api:
+	@echo
+	python3 pyinfinitensor/tests/test_api.py
+
 docker-build: 
 	docker build -f scripts/dockerfile/$(DOCKER_FILE) -t $(DOCKER_NAME) .
 
diff --git a/include/core/tensor_base.h b/include/core/tensor_base.h
index 54d65ffd..05a8a727 100644
--- a/include/core/tensor_base.h
+++ b/include/core/tensor_base.h
@@ -44,6 +44,7 @@ class TensorBaseObj : public Object {
     }
 
     DataType getDType() const { return dtype; }
+    int getDTypeIndex() const { return dtype.getIndex(); }
     Runtime getRuntime() const { return runtime; }
 
     //     std::pair<Operator *, int> getOutputOfWithIndex();
diff --git a/pyinfinitensor/tests/test_api.py b/pyinfinitensor/tests/test_api.py
new file mode 100644
index 00000000..d0d77b88
--- /dev/null
+++ b/pyinfinitensor/tests/test_api.py
@@ -0,0 +1,65 @@
+import os, unittest
+from onnx import TensorProto
+from pyinfinitensor import backend
+import numpy as np
+
+
+class TestPythonAPI(unittest.TestCase):
+    def test_copyin_numpy(self):
+        dims = [2, 3, 5, 4]
+        np_array = np.random.random(dims).astype(np.float32)
+        handler = backend.GraphHandler(backend.cpu_runtime())
+        tensor1 = handler.tensor(dims, TensorProto.FLOAT)
+        tensor2 = handler.tensor(dims, TensorProto.FLOAT)
+        handler.data_malloc()
+        tensor1.copyin_numpy(np_array)
+        tensor2.copyin_float(np_array.flatten().tolist())
+        array1 = tensor1.copyout_float()
+        array2 = tensor2.copyout_float()
+        self.assertEqual(array1, array2)
+        self.assertTrue(np.array_equal(np.array(array1).reshape(dims), np_array))
+
+        np_array = np.random.random(dims).astype(np.int64)
+        handler = backend.GraphHandler(backend.cpu_runtime())
+        tensor1 = handler.tensor(dims, TensorProto.INT64)
+        tensor2 = handler.tensor(dims, TensorProto.INT64)
+        handler.data_malloc()
+        tensor1.copyin_numpy(np_array)
+        tensor2.copyin_int64(np_array.flatten().tolist())
+        array1 = tensor1.copyout_int64()
+        array2 = tensor2.copyout_int64()
+        self.assertEqual(array1, array2)
+        self.assertTrue(np.array_equal(np.array(array1).reshape(dims), np_array))
+
+    def test_copyout_numpy(self):
+        dims = [2, 3, 5, 4]
+        np_array = np.random.random(dims).astype(np.float32)
+        handler = backend.GraphHandler(backend.cpu_runtime())
+        tensor1 = handler.tensor(dims, TensorProto.FLOAT)
+        tensor2 = handler.tensor(dims, TensorProto.FLOAT)
+        handler.data_malloc()
+        tensor1.copyin_float(np_array.flatten().tolist())
+        tensor2.copyin_float(np_array.flatten().tolist())
+        array1 = np.array(tensor1.copyout_float()).reshape(dims)
+        array2 = tensor2.copyout_numpy()
+        self.assertTrue(np.array_equal(array2, np_array))
+        self.assertTrue(np.array_equal(array1, array2))
+
+        np_array = np.random.random(dims).astype(np.float16)
+        np_array[0, 0, 0, 0] = .1
+        handler = backend.GraphHandler(backend.cpu_runtime())
+        tensor1 = handler.tensor(dims, TensorProto.FLOAT16)
+        handler.data_malloc()
+        tensor1.copyin_numpy(np_array)
+        array1 = tensor1.copyout_numpy()
+        # Copy should be the same as original array
+        self.assertTrue(np.array_equal(array1, np_array)) 
+        # Modify the value so that tensorObj value changes
+        np_array[0, 0, 0, 0] = 0. 
+        tensor1.copyin_numpy(np_array)
+        # The copied-out array should not change
+        self.assertFalse(np.array_equal(array1, np_array)) 
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pyinfinitensor/tests/test_onnx.py b/pyinfinitensor/tests/test_onnx.py
index c5ff13ee..6d041ed2 100644
--- a/pyinfinitensor/tests/test_onnx.py
+++ b/pyinfinitensor/tests/test_onnx.py
@@ -458,54 +458,6 @@ class TestStringMethods(unittest.TestCase):
         where = make_node("Where", ["x", "y", "con"], ["output"], name="where")
         make_and_import_model(make_graph([where], "where", [x, y, con], [output]))
 
-    def test_copyin(self):
-        dims = [2, 3, 5, 4]
-        np_array = np.random.random(dims).astype(np.float32)
-        handler = backend.GraphHandler(backend.cpu_runtime())
-        tensor1 = handler.tensor(dims, TensorProto.FLOAT)
-        tensor2 = handler.tensor(dims, TensorProto.FLOAT)
-        handler.data_malloc()
-        tensor1.copyin_numpy(np_array)
-        tensor2.copyin_float(np_array.flatten().tolist())
-        array1 = tensor1.copyout_float()
-        array2 = tensor2.copyout_float()
-        self.assertEqual(array1, array2)
-        self.assertTrue(np.array_equal(np.array(array1).reshape(dims), np_array))
-
-        np_array = np.random.random(dims).astype(np.int64)
-        handler = backend.GraphHandler(backend.cpu_runtime())
-        tensor1 = handler.tensor(dims, TensorProto.INT64)
-        tensor2 = handler.tensor(dims, TensorProto.INT64)
-        handler.data_malloc()
-        tensor1.copyin_numpy(np_array)
-        tensor2.copyin_int64(np_array.flatten().tolist())
-        array1 = tensor1.copyout_int64()
-        array2 = tensor2.copyout_int64()
-        self.assertEqual(array1, array2)
-        self.assertTrue(np.array_equal(np.array(array1).reshape(dims), np_array))
-
-    def test_to_numpy(self):
-        dims = [2, 3, 5, 4]
-        np_array = np.random.random(dims).astype(np.float32)
-        handler = backend.GraphHandler(backend.cpu_runtime())
-        tensor1 = handler.tensor(dims, TensorProto.FLOAT)
-        tensor2 = handler.tensor(dims, TensorProto.FLOAT)
-        handler.data_malloc()
-        tensor1.copyin_float(np_array.flatten().tolist())
-        tensor2.copyin_float(np_array.flatten().tolist())
-        array1 = np.array(tensor1.copyout_float()).reshape(dims)
-        array2 = np.array(tensor2)
-        self.assertTrue(np.array_equal(array2, np_array))
-        self.assertTrue(np.array_equal(array1, array2))
-
-        np_array = np.random.random(dims).astype(np.float16)
-        handler = backend.GraphHandler(backend.cpu_runtime())
-        tensor1 = handler.tensor(dims, TensorProto.FLOAT16)
-        handler.data_malloc()
-        tensor1.copyin_numpy(np_array)
-        array1 = np.array(tensor1, copy=False)
-        self.assertTrue(np.array_equal(array1, np_array))
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc
index 5cc9717f..28b17bd4 100644
--- a/src/ffi/ffi_infinitensor.cc
+++ b/src/ffi/ffi_infinitensor.cc
@@ -306,6 +306,44 @@ void export_functions(py::module &m) {
 #undef FUNCTION
 }
 
+// A helper function that converts DataType to python format string
+static std::string getFormat(DataType type) {
+    std::string format;
+    if (type == DataType::Float32) {
+        format = py::format_descriptor<float>::format();
+    } else if (type == DataType::Double) {
+        format = py::format_descriptor<double>::format();
+    } else if (type == DataType::Int32) {
+        format = py::format_descriptor<int>::format();
+    } else if (type == DataType::UInt32) {
+        format = py::format_descriptor<uint32_t>::format();
+    } else if (type == DataType::Int64) {
+        format = py::format_descriptor<int64_t>::format();
+    } else if (type == DataType::UInt64) {
+        format = py::format_descriptor<uint64_t>::format();
+    } else if (type == DataType::Int16) {
+        format = py::format_descriptor<int16_t>::format();
+    } else if (type == DataType::UInt16) {
+        format = py::format_descriptor<uint16_t>::format();
+    } else if (type == DataType::Int8) {
+        format = py::format_descriptor<int8_t>::format();
+    } else if (type == DataType::UInt8) {
+        format = py::format_descriptor<uint8_t>::format();
+    } else if (type == DataType::Bool) {
+        format = py::format_descriptor<bool>::format();
+    } else if (type == DataType::Float16 || type == DataType::BFloat16) {
+        // Python uses "e" for half precision float type code.
+        // Check the following link for more information.
+        // https://docs.python.org/3/library/struct.html#format-characters
+        format = "e";
+    } else {
+        throw std::runtime_error("Error converting TensorObj to "
+                                 "Numpy: unsupported datatype.\n");
+    }
+
+    return format;
+}
+
 void init_graph_builder(py::module &m) {
     using Handler = GraphHandlerObj;
 
@@ -326,6 +364,7 @@ void init_graph_builder(py::module &m) {
                                                       py::buffer_protocol())
         .def("fuid", &TensorObj::getFuid, policy::automatic)
         .def("shape", &TensorObj::getDims, policy::move)
+        .def("dtype", &TensorObj::getDTypeIndex, policy::automatic)
         .def("copyin_float", &TensorObj::copyin<float>, policy::move)
         .def("copyin_int32", &TensorObj::copyin<int32_t>, policy::move)
         .def("copyin_int64", &TensorObj::copyin<int64_t>, policy::move)
@@ -352,51 +391,24 @@ void init_graph_builder(py::module &m) {
                  }
                  self.copyin(data_np, self.getBytes());
              })
-        // A buffer can be used to convert a TensorObj directly to Numpy array
-        // without copy
-        .def_buffer([](TensorObj &self) -> py::buffer_info {
-            vector<size_t> stride_byte;
-            for (int s : self.getStride()) {
-                stride_byte.push_back(s * self.getDType().getSize());
-            }
+        // Return a Numpy array which copies the values of this tensor
+        .def("copyout_numpy",
+             [](TensorObj &self) -> py::array {
+                 vector<size_t> stride_byte;
+                 for (int s : self.getStride()) {
+                     stride_byte.push_back(s * self.getDType().getSize());
+                 }
+                 std::string format = getFormat(self.getDType());
 
-            std::string format;
-            if (self.getDType() == DataType::Float32) {
-                format = py::format_descriptor<float>::format();
-            } else if (self.getDType() == DataType::Double) {
-                format = py::format_descriptor<double>::format();
-            } else if (self.getDType() == DataType::Int32) {
-                format = py::format_descriptor<int>::format();
-            } else if (self.getDType() == DataType::UInt32) {
-                format = py::format_descriptor<uint32_t>::format();
-            } else if (self.getDType() == DataType::Int64) {
-                format = py::format_descriptor<int64_t>::format();
-            } else if (self.getDType() == DataType::UInt64) {
-                format = py::format_descriptor<uint64_t>::format();
-            } else if (self.getDType() == DataType::Int16) {
-                format = py::format_descriptor<int16_t>::format();
-            } else if (self.getDType() == DataType::UInt16) {
-                format = py::format_descriptor<uint16_t>::format();
-            } else if (self.getDType() == DataType::Int8) {
-                format = py::format_descriptor<int8_t>::format();
-            } else if (self.getDType() == DataType::UInt8) {
-                format = py::format_descriptor<uint8_t>::format();
-            } else if (self.getDType() == DataType::Float16 ||
-                       self.getDType() == DataType::BFloat16) {
-                // Python uses "e" for half precision float type code.
-                // Check the following link for more information.
-                // https://docs.python.org/3/library/struct.html#format-characters
-                format = "e";
-            } else {
-                throw std::runtime_error("Error converting TensorObj to "
-                                         "Numpy: unsupported datatype.\n");
-            }
+                 py::array numpy_array(py::dtype(format), self.getDims(),
+                                       nullptr);
 
-            return py::buffer_info(self.getRawDataPtr<void *>(),
-                                   self.getDType().getSize(), format,
-                                   self.getRank(), self.getDims(), stride_byte,
-                                   true); // Read-only = true
-        })
+                 // Copy data to the numpy array
+                 auto ptr = numpy_array.mutable_data();
+                 self.copyout(ptr, self.getBytes());
+
+                 return numpy_array;
+             })
         .def("has_target", &TensorObj::hasTarget, policy::automatic)
         .def("src", &TensorObj::getSource, policy::move)
         .def("printData", &TensorObj::printData, policy::automatic);