From f0fcbe825f4562e6ba860cec480f640ef5508903 Mon Sep 17 00:00:00 2001 From: Liyan Zheng Date: Fri, 21 Apr 2023 13:07:58 +0800 Subject: [PATCH] Add: python verification --- include/nnet/test_models.h | 16 ++++++ pyinfinitensor/src/pyinfinitensor/onnx.py | 10 ++++ src/cuda/cuda_runtime.cc | 1 + src/ffi/ffi_infinitensor.cc | 22 ++++----- src/nnet/App/test_models.cc | 10 ++++ test/nnet/run_models_nnet.py | 60 +++++++++++++++++------ 6 files changed, 92 insertions(+), 27 deletions(-) create mode 100644 include/nnet/test_models.h diff --git a/include/nnet/test_models.h b/include/nnet/test_models.h new file mode 100644 index 00000000..d4d4d078 --- /dev/null +++ b/include/nnet/test_models.h @@ -0,0 +1,16 @@ +#ifdef USE_CUDA +#include "core/graph.h" +#include "core/runtime.h" +#include "core/search_engine.h" + +namespace infini { + +Graph getInfoGAN(int batch, Runtime runtime, int nLayers); +vector runInfoGAN(int nLayers); +Graph getConvtransposedNHWC(Runtime runtime, Shape shape, int layerId); +Graph optimizeGraph(Graph g, Runtime runtime, bool tuning); +void initializeGraphTensors(Graph g, double l, double r, bool useInt); + +} // namespace infini + +#endif diff --git a/pyinfinitensor/src/pyinfinitensor/onnx.py b/pyinfinitensor/src/pyinfinitensor/onnx.py index 233a1b58..4b094e5f 100644 --- a/pyinfinitensor/src/pyinfinitensor/onnx.py +++ b/pyinfinitensor/src/pyinfinitensor/onnx.py @@ -878,6 +878,16 @@ class OnnxStub: ctx.push_data_input(name, "max", TensorProto.FLOAT, [], []) ) ctx.push_node(make_node(ty.name, inputs, outputs, name)) + elif ty == backend.OpType.ConvTransNHWC: + ctx.push_node( + make_node( + ty.name, + inputs, + outputs, + name, + domain="nnet", + ) + ) elif ty == backend.OpType.MemBound: ctx.push_node( make_node( diff --git a/src/cuda/cuda_runtime.cc b/src/cuda/cuda_runtime.cc index 1bcbf105..68a63186 100644 --- a/src/cuda/cuda_runtime.cc +++ b/src/cuda/cuda_runtime.cc @@ -156,6 +156,7 @@ double CudaRuntimeObj::timeWithCudaGraph(Graph graph) { IT_ASSERT(numCudaGraphNodes == kernels.size(), std::to_string(numCudaGraphNodes) + " != " + std::to_string(kernels.size())); + printf("numCudaGraphNodes = %lu\n", numCudaGraphNodes); return timeit( [&, cudaGraphInstance = cudaGraphInstance, stream = getStream()]() { checkCudaError(cudaGraphLaunch(cudaGraphInstance, stream)); diff --git a/src/ffi/ffi_infinitensor.cc b/src/ffi/ffi_infinitensor.cc index b36f7090..c8b3a3c1 100644 --- a/src/ffi/ffi_infinitensor.cc +++ b/src/ffi/ffi_infinitensor.cc @@ -2,6 +2,7 @@ #include "core/mutator.h" #include "core/search_engine.h" #include "nnet/nmutator.h" +#include "nnet/test_models.h" #include "operators/batch_norm.h" #include "operators/concat.h" #include "operators/conv.h" @@ -378,20 +379,17 @@ void init_graph_builder(py::module &m) { .def("topo_sort", &GraphObj::topo_sort); } -#ifdef USE_CUDA -Graph getInfoGAN(int batch, Runtime runtime, int nLayers); -vector runInfoGAN(int nLayers); -Graph getConvtransposedNHWC(Runtime runtime, Shape shape, int layerId); -Graph optimizeGraph(Graph g, Runtime runtime, bool tuning); - void export_test_model(py::module &m) { - m.def("runInfoGAN", &runInfoGAN); - m.def("getInfoGAN", &getInfoGAN); - m.def("getConvtransposedNHWC", &getConvtransposedNHWC); - m.def("optimizeGraph", &optimizeGraph, "graph"_a, "runtime"_a, - "tuning"_a = false); -} +#ifdef USE_CUDA + m.def("runInfoGAN", &runInfoGAN) + .def("getInfoGAN", &getInfoGAN) + .def("getConvtransposedNHWC", &getConvtransposedNHWC) + .def("optimizeGraph", &optimizeGraph, "graph"_a, "runtime"_a, + "tuning"_a = false) + .def("initializeGraphTensors", &initializeGraphTensors, "g"_a, + "l"_a = -0.1, "r"_a = 0.1, "useInt"_a = false); #endif +} } // namespace infini diff --git a/src/nnet/App/test_models.cc b/src/nnet/App/test_models.cc index 80d5ed93..4c3a1f96 100644 --- a/src/nnet/App/test_models.cc +++ b/src/nnet/App/test_models.cc @@ -83,6 +83,16 @@ void printGraph(Graph g) { } } +void initializeGraphTensors(Graph g, double l, double r, bool useInt) { + auto gen = RandomGenerator(-0.1, 0.1, 0, useInt); + for (auto t : g->getInputs()) { + t->setData(gen); + } + for (auto t : g->getOutputs()) { + t->setData(ZeroGenerator()); + } +} + Graph optimizeGraph(Graph g, Runtime _runtime, bool tuning) { auto runtime = as(_runtime); Runtime cpu = NativeCpuRuntimeObj::getInstance(); diff --git a/test/nnet/run_models_nnet.py b/test/nnet/run_models_nnet.py index 76f28328..c24a9c5b 100644 --- a/test/nnet/run_models_nnet.py +++ b/test/nnet/run_models_nnet.py @@ -71,21 +71,14 @@ def run_e2e_InfoGAN(): df.to_csv('a.csv') -def runSingleConvT(): - runtime = ft.cuda_runtime() - g = ft.getConvtransposedNHWC(runtime, [1, 2, 2, 448], 1) - opt_g = ft.optimizeGraph(g, runtime) - ft.if_onnx.export_onnx(opt_g, 'convtransposed.onnx') +def getSingleConvT(runtime): + return ft.getConvtransposedNHWC(runtime, [1, 2, 2, 448], 1) -def run_InfoGAN_without_tuning(runtime, tuning: bool): - g = ft.getInfoGAN(1, runtime, 5) - # g = ft.getInfoGAN(1, runtime, 1) - opt_g = ft.optimizeGraph(g, runtime, tuning) +def save_onnx(opt_g: ft.Graph, filename: str): stub = OnnxStub.from_graph(opt_g) - with open("optimized.onnx", "wb") as f: + with open(filename, "wb") as f: f.write(stub.to_onnx("optimized").SerializeToString()) - return opt_g def load_onnx(runtime) -> ft.Graph: @@ -100,14 +93,51 @@ def run_and_evaluate(runtime, g): print(f'Cuda graph time = {runtime.timeWithCudaGraph(g)}') +def run_graph_get_output(runtime, g): + ft.initializeGraphTensors(g) + runtime.run(g, True) + runtime.run(g, False) + tensors = [to_pytorch_tensor(t) for t in g.outputs()] + assert len(tensors) == 1 + return tensors[0] + + +def compare_tensors(ans, x): + assert ans.shape == x.shape + print(f'Allclose {torch.allclose(ans, x)}') + # Print error numbers + tot = np.product(ans.shape) + data = [] + for i in range(0, 10): + tol = 10**(-i) + clo = torch.isclose(ans, x, atol=tol, rtol=tol).sum().item() + print(f'0.1^{i} close: {clo}/{tot} = {clo/tot}') + data.append(clo/tot) + + # rel_err = torch.abs((ans-x)/ans) + # print(f'rel_err = {rel_err}') + # print(f'max rel err = {rel_err.max()}') + print(f'ans = {ans}') + print(f'x = {x}') + + if __name__ == "__main__": + runtime = ft.cuda_runtime() # run_e2e_InfoGAN() # runSingleConvT() # read_and_check() - - runtime = ft.cuda_runtime() if True: - g = run_InfoGAN_without_tuning(runtime, False) + original_g = ft.getInfoGAN(16, runtime, 5) + # original_g = ft.getConvtransposedNHWC(runtime, [1, 1, 1, 228], 0) # ConvTranspose 2x2 + # original_g = ft.getConvtransposedNHWC(runtime, [16, 2, 2, 448], 1) # ConvTranspose 4x4 + g = ft.optimizeGraph(original_g, runtime, tuning=False) else: g = load_onnx(runtime) - run_and_evaluate(runtime, g) + save_onnx(g, "optimized.onnx") + + ans = run_graph_get_output(runtime, original_g) + x = run_graph_get_output(runtime, g) + print('=== 138') + compare_tensors(ans, x) + + # run_and_evaluate(runtime, g)