diff --git a/Makefile b/Makefile index d6dfadd2..01784937 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,10 @@ test-onnx: @echo python3 pyinfinitensor/tests/test_onnx.py +test-api: + @echo + python3 pyinfinitensor/tests/test_api.py + docker-build: docker build -f scripts/dockerfile/$(DOCKER_FILE) -t $(DOCKER_NAME) . diff --git a/include/core/tensor_base.h b/include/core/tensor_base.h index 54d65ffd..05a8a727 100644 --- a/include/core/tensor_base.h +++ b/include/core/tensor_base.h @@ -44,6 +44,7 @@ class TensorBaseObj : public Object { } DataType getDType() const { return dtype; } + int getDTypeIndex() const { return dtype.getIndex(); } Runtime getRuntime() const { return runtime; } // std::pair getOutputOfWithIndex(); diff --git a/pyinfinitensor/tests/test_api.py b/pyinfinitensor/tests/test_api.py new file mode 100644 index 00000000..d0d77b88 --- /dev/null +++ b/pyinfinitensor/tests/test_api.py @@ -0,0 +1,65 @@ +import os, unittest +from onnx import TensorProto +from pyinfinitensor import backend +import numpy as np + + +class TestPythonAPI(unittest.TestCase): + def test_copyin_numpy(self): + dims = [2, 3, 5, 4] + np_array = np.random.random(dims).astype(np.float32) + handler = backend.GraphHandler(backend.cpu_runtime()) + tensor1 = handler.tensor(dims, TensorProto.FLOAT) + tensor2 = handler.tensor(dims, TensorProto.FLOAT) + handler.data_malloc() + tensor1.copyin_numpy(np_array) + tensor2.copyin_float(np_array.flatten().tolist()) + array1 = tensor1.copyout_float() + array2 = tensor2.copyout_float() + self.assertEqual(array1, array2) + self.assertTrue(np.array_equal(np.array(array1).reshape(dims), np_array)) + + np_array = np.random.random(dims).astype(np.int64) + handler = backend.GraphHandler(backend.cpu_runtime()) + tensor1 = handler.tensor(dims, TensorProto.INT64) + tensor2 = handler.tensor(dims, TensorProto.INT64) + handler.data_malloc() + tensor1.copyin_numpy(np_array) + tensor2.copyin_int64(np_array.flatten().tolist()) + array1 = tensor1.copyout_int64() + array2 = tensor2.copyout_int64() + self.assertEqual(array1, array2) + self.assertTrue(np.array_equal(np.array(array1).reshape(dims), np_array)) + + def test_copyout_numpy(self): + dims = [2, 3, 5, 4] + np_array = np.random.random(dims).astype(np.float32) + handler = backend.GraphHandler(backend.cpu_runtime()) + tensor1 = handler.tensor(dims, TensorProto.FLOAT) + tensor2 = handler.tensor(dims, TensorProto.FLOAT) + handler.data_malloc() + tensor1.copyin_float(np_array.flatten().tolist()) + tensor2.copyin_float(np_array.flatten().tolist()) + array1 = np.array(tensor1.copyout_float()).reshape(dims) + array2 = tensor2.copyout_numpy() + self.assertTrue(np.array_equal(array2, np_array)) + self.assertTrue(np.array_equal(array1, array2)) + + np_array = np.random.random(dims).astype(np.float16) + np_array[0, 0, 0, 0] = .1 + handler = backend.GraphHandler(backend.cpu_runtime()) + tensor1 = handler.tensor(dims, TensorProto.FLOAT16) + handler.data_malloc() + tensor1.copyin_numpy(np_array) + array1 = tensor1.copyout_numpy() + # Copy should be the same as original array + self.assertTrue(np.array_equal(array1, np_array)) + # Modify the value so that tensorObj value changes + np_array[0, 0, 0, 0] = 0. + tensor1.copyin_numpy(np_array) + # The copied-out array should not change + self.assertFalse(np.array_equal(array1, np_array)) + + +if __name__ == "__main__": + unittest.main() diff --git a/pyinfinitensor/tests/test_onnx.py b/pyinfinitensor/tests/test_onnx.py index c5ff13ee..6d041ed2 100644 --- a/pyinfinitensor/tests/test_onnx.py +++ b/pyinfinitensor/tests/test_onnx.py @@ -458,54 +458,6 @@ class TestStringMethods(unittest.TestCase): where = make_node("Where", ["x", "y", "con"], ["output"], name="where") make_and_import_model(make_graph([where], "where", [x, y, con], [output])) - def test_copyin(self): - dims = [2, 3, 5, 4] - np_array = np.random.random(dims).astype(np.float32) - handler = backend.GraphHandler(backend.cpu_runtime()) - tensor1 = handler.tensor(dims, TensorProto.FLOAT) - tensor2 = handler.tensor(dims, TensorProto.FLOAT) - handler.data_malloc() - tensor1.copyin_numpy(np_array) - tensor2.copyin_float(np_array.flatten().tolist()) - array1 = tensor1.copyout_float() - array2 = tensor2.copyout_float() - self.assertEqual(array1, array2) - self.assertTrue(np.array_equal(np.array(array1).reshape(dims), np_array)) - - np_array = np.random.random(dims).astype(np.int64) - handler = backend.GraphHandler(backend.cpu_runtime()) - tensor1 = handler.tensor(dims, TensorProto.INT64) - tensor2 = handler.tensor(dims, TensorProto.INT64) - handler.data_malloc() - tensor1.copyin_numpy(np_array) - tensor2.copyin_int64(np_array.flatten().tolist()) - array1 = tensor1.copyout_int64() - array2 = tensor2.copyout_int64() - self.assertEqual(array1, array2) - self.assertTrue(np.array_equal(np.array(array1).reshape(dims), np_array)) - - def test_to_numpy(self): - dims = [2, 3, 5, 4] - np_array = np.random.random(dims).astype(np.float32) - handler = backend.GraphHandler(backend.cpu_runtime()) - tensor1 = handler.tensor(dims, TensorProto.FLOAT) - tensor2 = handler.tensor(dims, TensorProto.FLOAT) - handler.data_malloc() - tensor1.copyin_float(np_array.flatten().tolist()) - tensor2.copyin_float(np_array.flatten().tolist()) - array1 = np.array(tensor1.copyout_float()).reshape(dims) - array2 = np.array(tensor2) - self.assertTrue(np.array_equal(array2, np_array)) - self.assertTrue(np.array_equal(array1, array2)) - - np_array = np.random.random(dims).astype(np.float16) - handler = backend.GraphHandler(backend.cpu_runtime()) - tensor1 = handler.tensor(dims, TensorProto.FLOAT16) - handler.data_malloc() - tensor1.copyin_numpy(np_array) - array1 = np.array(tensor1, copy=False) - self.assertTrue(np.array_equal(array1, np_array)) - if __name__ == "__main__": unittest.main() diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index 5cc9717f..28b17bd4 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -306,6 +306,44 @@ void export_functions(py::module &m) { #undef FUNCTION } +// A helper function that converts DataType to python format string +static std::string getFormat(DataType type) { + std::string format; + if (type == DataType::Float32) { + format = py::format_descriptor::format(); + } else if (type == DataType::Double) { + format = py::format_descriptor::format(); + } else if (type == DataType::Int32) { + format = py::format_descriptor::format(); + } else if (type == DataType::UInt32) { + format = py::format_descriptor::format(); + } else if (type == DataType::Int64) { + format = py::format_descriptor::format(); + } else if (type == DataType::UInt64) { + format = py::format_descriptor::format(); + } else if (type == DataType::Int16) { + format = py::format_descriptor::format(); + } else if (type == DataType::UInt16) { + format = py::format_descriptor::format(); + } else if (type == DataType::Int8) { + format = py::format_descriptor::format(); + } else if (type == DataType::UInt8) { + format = py::format_descriptor::format(); + } else if (type == DataType::Bool) { + format = py::format_descriptor::format(); + } else if (type == DataType::Float16 || type == DataType::BFloat16) { + // Python uses "e" for half precision float type code. + // Check the following link for more information. + // https://docs.python.org/3/library/struct.html#format-characters + format = "e"; + } else { + throw std::runtime_error("Error converting TensorObj to " + "Numpy: unsupported datatype.\n"); + } + + return format; +} + void init_graph_builder(py::module &m) { using Handler = GraphHandlerObj; @@ -326,6 +364,7 @@ void init_graph_builder(py::module &m) { py::buffer_protocol()) .def("fuid", &TensorObj::getFuid, policy::automatic) .def("shape", &TensorObj::getDims, policy::move) + .def("dtype", &TensorObj::getDTypeIndex, policy::automatic) .def("copyin_float", &TensorObj::copyin, policy::move) .def("copyin_int32", &TensorObj::copyin, policy::move) .def("copyin_int64", &TensorObj::copyin, policy::move) @@ -352,51 +391,24 @@ void init_graph_builder(py::module &m) { } self.copyin(data_np, self.getBytes()); }) - // A buffer can be used to convert a TensorObj directly to Numpy array - // without copy - .def_buffer([](TensorObj &self) -> py::buffer_info { - vector stride_byte; - for (int s : self.getStride()) { - stride_byte.push_back(s * self.getDType().getSize()); - } + // Return a Numpy array which copies the values of this tensor + .def("copyout_numpy", + [](TensorObj &self) -> py::array { + vector stride_byte; + for (int s : self.getStride()) { + stride_byte.push_back(s * self.getDType().getSize()); + } + std::string format = getFormat(self.getDType()); - std::string format; - if (self.getDType() == DataType::Float32) { - format = py::format_descriptor::format(); - } else if (self.getDType() == DataType::Double) { - format = py::format_descriptor::format(); - } else if (self.getDType() == DataType::Int32) { - format = py::format_descriptor::format(); - } else if (self.getDType() == DataType::UInt32) { - format = py::format_descriptor::format(); - } else if (self.getDType() == DataType::Int64) { - format = py::format_descriptor::format(); - } else if (self.getDType() == DataType::UInt64) { - format = py::format_descriptor::format(); - } else if (self.getDType() == DataType::Int16) { - format = py::format_descriptor::format(); - } else if (self.getDType() == DataType::UInt16) { - format = py::format_descriptor::format(); - } else if (self.getDType() == DataType::Int8) { - format = py::format_descriptor::format(); - } else if (self.getDType() == DataType::UInt8) { - format = py::format_descriptor::format(); - } else if (self.getDType() == DataType::Float16 || - self.getDType() == DataType::BFloat16) { - // Python uses "e" for half precision float type code. - // Check the following link for more information. - // https://docs.python.org/3/library/struct.html#format-characters - format = "e"; - } else { - throw std::runtime_error("Error converting TensorObj to " - "Numpy: unsupported datatype.\n"); - } + py::array numpy_array(py::dtype(format), self.getDims(), + nullptr); - return py::buffer_info(self.getRawDataPtr(), - self.getDType().getSize(), format, - self.getRank(), self.getDims(), stride_byte, - true); // Read-only = true - }) + // Copy data to the numpy array + auto ptr = numpy_array.mutable_data(); + self.copyout(ptr, self.getBytes()); + + return numpy_array; + }) .def("has_target", &TensorObj::hasTarget, policy::automatic) .def("src", &TensorObj::getSource, policy::move) .def("printData", &TensorObj::printData, policy::automatic);