diff --git a/.clang-format b/.clang-format
new file mode 100644
index 00000000..d8a7dfe7
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,90 @@
+---
+Language:        Cpp
+# BasedOnStyle:  LLVM
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: false
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:   
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeCategories: 
+  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
+    Priority:        2
+  - Regex:           '^(<|"(gtest|isl|json)/)'
+    Priority:        3
+  - Regex:           '.*'
+    Priority:        1
+IndentCaseLabels: false
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+ReflowComments:  true
+SortIncludes:    true
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp11
+TabWidth:        8
+UseTab:          Never
+...
+
diff --git a/.cmake-format.json b/.cmake-format.json
new file mode 100644
index 00000000..ba3498b9
--- /dev/null
+++ b/.cmake-format.json
@@ -0,0 +1,311 @@
+{
+  "_help_parse": "Options affecting listfile parsing",
+  "parse": {
+    "_help_additional_commands": [
+      "Specify structure for custom cmake functions"
+    ],
+    "additional_commands": {
+      "foo": {
+        "flags": [
+          "BAR",
+          "BAZ"
+        ],
+        "kwargs": {
+          "HEADERS": "*",
+          "SOURCES": "*",
+          "DEPENDS": "*"
+        }
+      }
+    },
+    "_help_override_spec": [
+      "Override configurations per-command where available"
+    ],
+    "override_spec": {},
+    "_help_vartags": [
+      "Specify variable tags."
+    ],
+    "vartags": [],
+    "_help_proptags": [
+      "Specify property tags."
+    ],
+    "proptags": []
+  },
+  "_help_format": "Options affecting formatting.",
+  "format": {
+    "_help_disable": [
+      "Disable formatting entirely, making cmake-format a no-op"
+    ],
+    "disable": false,
+    "_help_line_width": [
+      "How wide to allow formatted cmake files"
+    ],
+    "line_width": 1500,
+    "_help_tab_size": [
+      "How many spaces to tab for indent"
+    ],
+    "tab_size": 2,
+    "_help_use_tabchars": [
+      "If true, lines are indented using tab characters (utf-8",
+      "0x09) instead of <tab_size> space characters (utf-8 0x20).",
+      "In cases where the layout would require a fractional tab",
+      "character, the behavior of the  fractional indentation is",
+      "governed by <fractional_tab_policy>"
+    ],
+    "use_tabchars": false,
+    "_help_fractional_tab_policy": [
+      "If <use_tabchars> is True, then the value of this variable",
+      "indicates how fractional indentions are handled during",
+      "whitespace replacement. If set to 'use-space', fractional",
+      "indentation is left as spaces (utf-8 0x20). If set to",
+      "`round-up` fractional indentation is replaced with a single",
+      "tab character (utf-8 0x09) effectively shifting the column",
+      "to the next tabstop"
+    ],
+    "fractional_tab_policy": "use-space",
+    "_help_max_subgroups_hwrap": [
+      "If an argument group contains more than this many sub-groups",
+      "(parg or kwarg groups) then force it to a vertical layout."
+    ],
+    "max_subgroups_hwrap": 2,
+    "_help_max_pargs_hwrap": [
+      "If a positional argument group contains more than this many",
+      "arguments, then force it to a vertical layout."
+    ],
+    "max_pargs_hwrap": 6,
+    "_help_max_rows_cmdline": [
+      "If a cmdline positional group consumes more than this many",
+      "lines without nesting, then invalidate the layout (and nest)"
+    ],
+    "max_rows_cmdline": 2,
+    "_help_separate_ctrl_name_with_space": [
+      "If true, separate flow control names from their parentheses",
+      "with a space"
+    ],
+    "separate_ctrl_name_with_space": false,
+    "_help_separate_fn_name_with_space": [
+      "If true, separate function names from parentheses with a",
+      "space"
+    ],
+    "separate_fn_name_with_space": false,
+    "_help_dangle_parens": [
+      "If a statement is wrapped to more than one line, than dangle",
+      "the closing parenthesis on its own line."
+    ],
+    "dangle_parens": false,
+    "_help_dangle_align": [
+      "If the trailing parenthesis must be 'dangled' on its on",
+      "line, then align it to this reference: `prefix`: the start",
+      "of the statement,  `prefix-indent`: the start of the",
+      "statement, plus one indentation  level, `child`: align to",
+      "the column of the arguments"
+    ],
+    "dangle_align": "prefix",
+    "_help_min_prefix_chars": [
+      "If the statement spelling length (including space and",
+      "parenthesis) is smaller than this amount, then force reject",
+      "nested layouts."
+    ],
+    "min_prefix_chars": 4,
+    "_help_max_prefix_chars": [
+      "If the statement spelling length (including space and",
+      "parenthesis) is larger than the tab width by more than this",
+      "amount, then force reject un-nested layouts."
+    ],
+    "max_prefix_chars": 10,
+    "_help_max_lines_hwrap": [
+      "If a candidate layout is wrapped horizontally but it exceeds",
+      "this many lines, then reject the layout."
+    ],
+    "max_lines_hwrap": 2,
+    "_help_line_ending": [
+      "What style line endings to use in the output."
+    ],
+    "line_ending": "unix",
+    "_help_command_case": [
+      "Format command names consistently as 'lower' or 'upper' case"
+    ],
+    "command_case": "canonical",
+    "_help_keyword_case": [
+      "Format keywords consistently as 'lower' or 'upper' case"
+    ],
+    "keyword_case": "unchanged",
+    "_help_always_wrap": [
+      "A list of command names which should always be wrapped"
+    ],
+    "always_wrap": [],
+    "_help_enable_sort": [
+      "If true, the argument lists which are known to be sortable",
+      "will be sorted lexicographicall"
+    ],
+    "enable_sort": true,
+    "_help_autosort": [
+      "If true, the parsers may infer whether or not an argument",
+      "list is sortable (without annotation)."
+    ],
+    "autosort": false,
+    "_help_require_valid_layout": [
+      "By default, if cmake-format cannot successfully fit",
+      "everything into the desired linewidth it will apply the",
+      "last, most aggressive attempt that it made. If this flag is",
+      "True, however, cmake-format will print error, exit with non-",
+      "zero status code, and write-out nothing"
+    ],
+    "require_valid_layout": false,
+    "_help_layout_passes": [
+      "A dictionary mapping layout nodes to a list of wrap",
+      "decisions. See the documentation for more information."
+    ],
+    "layout_passes": {}
+  },
+  "_help_markup": "Options affecting comment reflow and formatting.",
+  "markup": {
+    "_help_bullet_char": [
+      "What character to use for bulleted lists"
+    ],
+    "bullet_char": "*",
+    "_help_enum_char": [
+      "What character to use as punctuation after numerals in an",
+      "enumerated list"
+    ],
+    "enum_char": ".",
+    "_help_first_comment_is_literal": [
+      "If comment markup is enabled, don't reflow the first comment",
+      "block in each listfile. Use this to preserve formatting of",
+      "your copyright/license statements."
+    ],
+    "first_comment_is_literal": false,
+    "_help_literal_comment_pattern": [
+      "If comment markup is enabled, don't reflow any comment block",
+      "which matches this (regex) pattern. Default is `None`",
+      "(disabled)."
+    ],
+    "literal_comment_pattern": ".*",
+    "_help_fence_pattern": [
+      "Regular expression to match preformat fences in comments",
+      "default= ``r'^\\s*([`~]{3}[`~]*)(.*)$'``"
+    ],
+    "fence_pattern": "^\\s*([`~]{3}[`~]*)(.*)$",
+    "_help_ruler_pattern": [
+      "Regular expression to match rulers in comments default=",
+      "``r'^\\s*[^\\w\\s]{3}.*[^\\w\\s]{3}$'``"
+    ],
+    "ruler_pattern": "^\\s*[^\\w\\s]{3}.*[^\\w\\s]{3}$",
+    "_help_explicit_trailing_pattern": [
+      "If a comment line matches starts with this pattern then it",
+      "is explicitly a trailing comment for the preceding argument.",
+      "Default is '#<'"
+    ],
+    "explicit_trailing_pattern": "#<",
+    "_help_hashruler_min_length": [
+      "If a comment line starts with at least this many consecutive",
+      "hash characters, then don't lstrip() them off. This allows",
+      "for lazy hash rulers where the first hash char is not",
+      "separated by space"
+    ],
+    "hashruler_min_length": 10,
+    "_help_canonicalize_hashrulers": [
+      "If true, then insert a space between the first hash char and",
+      "remaining hash chars in a hash ruler, and normalize its",
+      "length to fill the column"
+    ],
+    "canonicalize_hashrulers": true,
+    "_help_enable_markup": [
+      "enable comment markup parsing and reflow"
+    ],
+    "enable_markup": true
+  },
+  "_help_lint": "Options affecting the linter",
+  "lint": {
+    "_help_disabled_codes": [
+      "a list of lint codes to disable"
+    ],
+    "disabled_codes": [],
+    "_help_function_pattern": [
+      "regular expression pattern describing valid function names"
+    ],
+    "function_pattern": "[0-9a-z_]+",
+    "_help_macro_pattern": [
+      "regular expression pattern describing valid macro names"
+    ],
+    "macro_pattern": "[0-9A-Z_]+",
+    "_help_global_var_pattern": [
+      "regular expression pattern describing valid names for",
+      "variables with global (cache) scope"
+    ],
+    "global_var_pattern": "[A-Z][0-9A-Z_]+",
+    "_help_internal_var_pattern": [
+      "regular expression pattern describing valid names for",
+      "variables with global scope (but internal semantic)"
+    ],
+    "internal_var_pattern": "_[A-Z][0-9A-Z_]+",
+    "_help_local_var_pattern": [
+      "regular expression pattern describing valid names for",
+      "variables with local scope"
+    ],
+    "local_var_pattern": "[a-z][a-z0-9_]+",
+    "_help_private_var_pattern": [
+      "regular expression pattern describing valid names for",
+      "privatedirectory variables"
+    ],
+    "private_var_pattern": "_[0-9a-z_]+",
+    "_help_public_var_pattern": [
+      "regular expression pattern describing valid names for public",
+      "directory variables"
+    ],
+    "public_var_pattern": "[A-Z][0-9A-Z_]+",
+    "_help_argument_var_pattern": [
+      "regular expression pattern describing valid names for",
+      "function/macro arguments and loop variables."
+    ],
+    "argument_var_pattern": "[a-z][a-z0-9_]+",
+    "_help_keyword_pattern": [
+      "regular expression pattern describing valid names for",
+      "keywords used in functions or macros"
+    ],
+    "keyword_pattern": "[A-Z][0-9A-Z_]+",
+    "_help_max_conditionals_custom_parser": [
+      "In the heuristic for C0201, how many conditionals to match",
+      "within a loop in before considering the loop a parser."
+    ],
+    "max_conditionals_custom_parser": 2,
+    "_help_min_statement_spacing": [
+      "Require at least this many newlines between statements"
+    ],
+    "min_statement_spacing": 1,
+    "_help_max_statement_spacing": [
+      "Require no more than this many newlines between statements"
+    ],
+    "max_statement_spacing": 2,
+    "max_returns": 6,
+    "max_branches": 12,
+    "max_arguments": 5,
+    "max_localvars": 15,
+    "max_statements": 50
+  },
+  "_help_encode": "Options affecting file encoding",
+  "encode": {
+    "_help_emit_byteorder_mark": [
+      "If true, emit the unicode byte-order mark (BOM) at the start",
+      "of the file"
+    ],
+    "emit_byteorder_mark": false,
+    "_help_input_encoding": [
+      "Specify the encoding of the input file. Defaults to utf-8"
+    ],
+    "input_encoding": "utf-8",
+    "_help_output_encoding": [
+      "Specify the encoding of the output file. Defaults to utf-8.",
+      "Note that cmake only claims to support utf-8 so be careful",
+      "when using anything else"
+    ],
+    "output_encoding": "utf-8"
+  },
+  "_help_misc": "Miscellaneous configurations options.",
+  "misc": {
+    "_help_per_command": [
+      "A dictionary containing any per-command configuration",
+      "overrides. Currently only `command_case` is supported."
+    ],
+    "per_command": {}
+  }
+}
diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml
new file mode 100644
index 00000000..24ba1a27
--- /dev/null
+++ b/.github/workflows/clang-format-check.yml
@@ -0,0 +1,19 @@
+name: clang-format Check
+on: [pull_request]
+jobs:
+  formatting-check:
+    name: Formatting Check
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        path:
+          - 'include'
+          - 'src'
+          - 'test'
+    steps:
+    - uses: actions/checkout@v2
+    - name: Run clang-format style check for C/C++/Protobuf programs.
+      uses: jidicula/clang-format-action@v4.8.0
+      with:
+        clang-format-version: '14'
+        check-path: ${{ matrix.path }}
diff --git a/.gitignore b/.gitignore
index 259148fa..6d512811 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,8 @@
 *.exe
 *.out
 *.app
+
+build/
+build_debug/
+
+.vscode/
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..2f905b6a
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,9 @@
+[submodule "3rd-party/pybind11"]
+	path = 3rd-party/pybind11
+	url = git@github.com:pybind/pybind11.git
+[submodule "3rd-party/nlohmann_json_cmake_fetchcontent"]
+	path = 3rd-party/nlohmann_json_cmake_fetchcontent
+	url = git@github.com:ArthurSonzogni/nlohmann_json_cmake_fetchcontent.git
+[submodule "3rd-party/googletest"]
+	path = 3rd-party/googletest
+	url = git@github.com:google/googletest.git
diff --git a/3rd-party/googletest b/3rd-party/googletest
new file mode 160000
index 00000000..e2239ee6
--- /dev/null
+++ b/3rd-party/googletest
@@ -0,0 +1 @@
+Subproject commit e2239ee6043f73722e7aa812a459f54a28552929
diff --git a/3rd-party/nlohmann_json_cmake_fetchcontent b/3rd-party/nlohmann_json_cmake_fetchcontent
new file mode 160000
index 00000000..6aebf092
--- /dev/null
+++ b/3rd-party/nlohmann_json_cmake_fetchcontent
@@ -0,0 +1 @@
+Subproject commit 6aebf09233951e4ce30a63919186a70b2b195756
diff --git a/3rd-party/pybind11 b/3rd-party/pybind11
new file mode 160000
index 00000000..1e3400b6
--- /dev/null
+++ b/3rd-party/pybind11
@@ -0,0 +1 @@
+Subproject commit 1e3400b6742288429f2069aaf5febf92d0662dae
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..f1079f65
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,91 @@
+# TODO: check the minimum cmake version
+cmake_minimum_required(VERSION 3.9) # Required by find_package(OpenMP)
+include(CMakeDependentOption)
+project(InfiniTensor C CXX)
+
+# Do not change these options in this file. Use cmake.config, cmake -DOPTION=VALUE, or ccmake to specify them.
+option(BUILD_TEST "Build tests" ON)
+cmake_dependent_option(BUILD_TEST_CORE "Build tests for core components" ON BUILD_TEST OFF)
+cmake_dependent_option(BUILD_TEST_PET "Build tests for PET" OFF BUILD_TEST OFF)
+cmake_dependent_option(BUILD_TEST_EINNET "Build tests for EINNET" OFF BUILD_TEST OFF)
+
+set(DEFAULT_BUILD_TYPE "RelWithDebInfo")
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_EXTENSIONS OFF) # -std=gnu++11 when on, -std=c++11 when off
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -Wno-error=deprecated-declarations")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -UNDEBUG") # Enable assertion
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -UNDEBUG") # Enable assertion
+
+find_package(
+  Python
+  COMPONENTS Interpreter Development
+  REQUIRED)
+find_package(CUDA REQUIRED)
+# OpenMP
+find_package(OpenMP)
+if(OpenMP_C_FOUND)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+endif()
+if(OpenMP_CXX_FOUND)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif()
+
+include_directories(include)
+
+# # Pybind11
+# add_subdirectory(3rd-party/pybind11)
+# include_directories(3rd-party/pybind11/include)
+
+# nlohmann_json
+add_subdirectory(3rd-party/nlohmann_json_cmake_fetchcontent)
+include_directories(3rd-party/nlohmann_json_cmake_fetchcontent/single_include)
+
+if(BUILD_TEST)
+  set(BUILD_GMOCK
+      OFF
+      CACHE BOOL "Do not build gmock" FORCE)
+  set(INSTALL_GTEST
+      OFF
+      CACHE BOOL "Do not install gtest" FORCE)
+  add_subdirectory(3rd-party/googletest)
+  include_directories(3rd-party/googletest/googletest/include)
+endif()
+
+file(GLOB_RECURSE SRC src/*.cc src/*.cu)
+# file(GLOB_RECURSE FFI src/ffi/ffi_pet.cc)
+# list(REMOVE_ITEM SRC ${TEST} ${FFI})
+
+add_library(InfiniTensor SHARED ${SRC})
+# Target
+# cuda_add_library(it SHARED ${SRC})
+# cuda_add_cublas_to_target(it) # cublas
+# # target_link_libraries(infini_cpp cudnn curand nlohmann_json::nlohmann_json pybind11::embed)
+
+# # Python bindings
+# pybind11_add_module(infini MODULE ${FFI})
+# target_link_libraries(infini PRIVATE infini_cpp)
+
+function(build_test files)
+  # Non-recursive glob for skip failed tests
+  file(GLOB TEST_SOURCES ${files})
+  foreach(testsourcefile ${TEST_SOURCES})
+    get_filename_component(testname ${testsourcefile} NAME_WE)
+    add_executable(${testname} ${testsourcefile})
+    target_link_libraries(${testname} InfiniTensor GTest::gtest_main)
+    add_test(NAME ${testname} COMMAND ${testname})
+  endforeach(testsourcefile ${TEST_SOURCES})
+endfunction()
+
+if(BUILD_TEST)
+  enable_testing()
+  if(BUILD_TEST_CORE)
+    build_test(test/core/*.cc)
+  endif()
+  if(BUILD_TEST_PET)
+    build_test(test/pet/*.cc)
+  endif()
+  if(BUILD_TEST_EINNET)
+    build_test(test/nnet/*.cc)
+  endif()
+endif()
diff --git a/include/core/common.h b/include/core/common.h
new file mode 100644
index 00000000..0fe7344e
--- /dev/null
+++ b/include/core/common.h
@@ -0,0 +1,61 @@
+#pragma once
+#include <cassert>
+#include <functional>
+#include <iostream>
+#include <list>
+#include <map>
+#include <optional>
+#include <set>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+namespace infini {
+using std::list;
+using std::map;
+using std::pair;
+using std::set;
+using std::string;
+using std::tie;
+using std::to_string;
+using std::tuple;
+using std::unordered_map;
+using std::vector;
+
+// Aliases
+using dtype = float;
+using HashType = size_t; // compatible with std::hash
+
+// Metaprogramming utilities
+#define _CAT(A, B) A##B
+#define _SELECT(NAME, NUM) _CAT(NAME##_, NUM)
+#define _GET_COUNT(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, COUNT, ...) COUNT
+#define _VA_SIZE(...) _GET_COUNT(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
+#define _VA_SELECT(NAME, ...) _SELECT(NAME, _VA_SIZE(__VA_ARGS__))(__VA_ARGS__)
+
+// Assert: conditions should have no side effect
+#define _IT_ASSERT_2(name, info)                                               \
+    (static_cast<bool>(name)                                                   \
+         ? void(0)                                                             \
+         : throw std::runtime_error(                                           \
+               std::string("[") + __FILE__ + ":" + std::to_string(__LINE__) +  \
+               "] Assertion failed (" + #name + "): " + #info))
+#define _IT_ASSERT_1(name) _IT_ASSERT_2(name, "");
+
+#define IT_ASSERT(...) _VA_SELECT(_IT_ASSERT, __VA_ARGS__)
+#define IT_TODO_HALT() IT_ASSERT(false, "Unimplemented")
+#define IT_TODO_SKIP() puts("Unimplemented " __FILE__ ":" __LINE__)
+
+// Other utilities
+
+// std::to_underlying is avaiable since C++23
+template <typename T> auto enum_to_underlying(T e) {
+    return static_cast<std::underlying_type_t<T>>(e);
+}
+
+double timeit(const std::function<void()> &func);
+
+} // namespace infini
diff --git a/include/core/graph.h b/include/core/graph.h
new file mode 100644
index 00000000..9c87310a
--- /dev/null
+++ b/include/core/graph.h
@@ -0,0 +1,47 @@
+#pragma once
+#include "core/operator.h"
+#include "core/tensor.h"
+
+namespace infini {
+
+// TODO: graph should be attached to a context
+class GraphNode : public Object {
+  protected:
+    TensorVec tensors;
+    TensorVec inputs;
+    TensorVec outputs;
+    OpVec ops;
+
+  public:
+    // Graph(OpVec oplist);
+    string toString() const override;
+
+    void addOp(Operator op) { ops.push_back(op); };
+    const TensorVec &getTensors() const { return tensors; }
+    const TensorVec &getInputs() const { return inputs; }
+    const TensorVec &getOutputs() const { return outputs; }
+    const OpVec &getOperators() const { return ops; }
+    // TensorVec &getInputs();
+    // TensorVec &getOutputs();
+
+    Tensor addTensor(Shape dim, DataType dtype = DataType::Int32) {
+        Tensor tensor = make_ref<TensorNode>(dim, dtype);
+        tensors.emplace_back(tensor);
+        return tensor;
+    }
+
+    void dataMalloc();
+
+  private:
+    // TODO: updateConnection
+    /**
+     * @brief Add reverse connections and Op relationship in ctor.
+     */
+    void updateConnection();
+
+    // TODO: move to another class
+    // bool exportOnnx(const char *path);
+    // bool importOnnx(const char *net);
+};
+
+} // namespace infini
diff --git a/include/core/kernel.h b/include/core/kernel.h
new file mode 100644
index 00000000..4ecb2065
--- /dev/null
+++ b/include/core/kernel.h
@@ -0,0 +1,76 @@
+#pragma once
+#include "core/common.h"
+#include "core/operator.h"
+#include "core/tensor.h"
+
+namespace infini {
+
+struct PerfRecord {
+    double time; // in milliseconds
+};
+
+class Kernel {
+  public:
+    Kernel() {}
+    virtual ~Kernel() {}
+
+    /**
+     * @param op The operator to be executed.
+     * @param record The parameters for kernel execution. If extra parameters
+     * are required, inherit from PerfRecord and add extra parameters.
+     * Otherwire, use PerfRecord directly.
+     */
+    virtual void compute(const Operator &op,
+                         const PerfRecord &record) const = 0;
+    /**
+     * @brief Executes an op with a default parameter.
+     */
+    virtual void compute(const Operator &op) const = 0;
+    // Premise: op is idempotent since it is called multiple times.
+    virtual PerfRecord tune(const Operator &op) const = 0;
+};
+
+class KernelRegistry {
+  public:
+    using KernelRecord =
+        tuple<Kernel *const, const string, const int>; // Kernel, name, ID
+
+  private:
+    std::map<KernelAttrs, KernelRecord> kernels;
+    int nKernels = 0;
+
+  public:
+    ~KernelRegistry() {
+        for (auto &[k, v] : kernels)
+            delete std::get<0>(v);
+    }
+    static KernelRegistry &getInstance() {
+        static KernelRegistry instance;
+        return instance;
+    }
+    bool registerKernel(const KernelAttrs &key, Kernel *kernel, string name) {
+        // TODO: mutliple kernels support: priority and check name
+        IT_ASSERT(kernels.find(key) == kernels.end(),
+                  "Kernel already registered");
+        kernels.emplace(key, KernelRecord{kernel, name, ++nKernels});
+        return true;
+    }
+    Kernel *getKernel(const KernelAttrs &kernelAttrs) const {
+        return std::get<0>(kernels.at(kernelAttrs));
+    }
+    const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const {
+        return kernels.at(kernelAttrs);
+    }
+};
+
+} // namespace infini
+
+#define _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, cnt)        \
+    namespace infini {                                                         \
+    static const bool _CAT(_register_kernel_, cnt) =                           \
+        KernelRegistry::getInstance().registerKernel(                          \
+            KernelAttrs{device, opType, dataType}, new kernel(), name);        \
+    }
+
+#define REGISTER_KERNEL(device, opType, dataType, kernel, name)                \
+    _REGISTER_KERNEL_1(device, opType, dataType, kernel, name, __COUNTER__)
diff --git a/include/core/mutator.h b/include/core/mutator.h
new file mode 100644
index 00000000..42402151
--- /dev/null
+++ b/include/core/mutator.h
@@ -0,0 +1,19 @@
+#pragma once
+#include "core/graph.h"
+
+namespace infini {
+
+class Mutator {
+  private:
+    int candidatesLimit;
+    // // Statistical data
+    // int numTotalCandidates;
+
+  public:
+    Mutator(int candidatesLimit) : candidatesLimit(candidatesLimit){};
+    virtual ~Mutator(){};
+
+    virtual vector<Graph> run(const Graph &in_graph) = 0;
+};
+
+} // namespace infini
diff --git a/include/core/object.h b/include/core/object.h
new file mode 100644
index 00000000..0faec9f5
--- /dev/null
+++ b/include/core/object.h
@@ -0,0 +1,54 @@
+#pragma once
+#include "core/common.h"
+#include "ref.h"
+
+namespace infini {
+
+using GuidBaseType = int;
+
+class Guid {
+  private:
+    GuidBaseType guid;
+
+  private:
+    GuidBaseType generateGuid() {
+        static GuidBaseType guidCnt = 0;
+        return ++guidCnt;
+    }
+
+  public:
+    Guid() { guid = generateGuid(); }
+    Guid(const Guid &rhs) { guid = generateGuid(); }
+    Guid &operator=(const Guid &rhs) {
+        guid = generateGuid();
+        return *this;
+    }
+
+    operator GuidBaseType() const { return guid; }
+};
+
+class Object {
+  protected:
+    Guid guid;
+
+  public:
+    virtual ~Object(){};
+    virtual string toString() const = 0;
+    void print() { std::cout << toString() << std::endl; }
+    Guid getGuid() const { return guid; }
+};
+
+inline std::ostream &operator<<(std::ostream &os, const Object &obj) {
+    os << obj.toString();
+    return os;
+}
+
+// Overload for Ref-wrapped Object
+template <typename T,
+          typename std::enable_if_t<std::is_base_of_v<Object, T>> * = nullptr>
+inline std::ostream &operator<<(std::ostream &os, const Ref<T> &obj) {
+    os << obj->toString();
+    return os;
+}
+
+} // namespace infini
\ No newline at end of file
diff --git a/include/core/operator.h b/include/core/operator.h
new file mode 100644
index 00000000..a24f240d
--- /dev/null
+++ b/include/core/operator.h
@@ -0,0 +1,180 @@
+#pragma once
+#include "core/tensor.h"
+
+namespace infini {
+
+enum class OpType {
+    Unknown = 0,
+    // linear
+    Conv = 100,
+    Matmul,
+    ConvTrans,
+    G2BMM,
+    GBMML,
+    Pad,
+    Slice,
+    Concat,
+    Split,
+    Transpose,
+    Extend,
+    MaxPool,
+    AvgPool,
+    Add,
+    Sub,
+    Mul,
+    Div,
+    Pow,
+    Gather,
+    ReduceMean,
+    Reshape,
+    Identity,
+    // element wise
+    BatchNorm = 200,
+    Softmax,
+    Activation,
+    Resize,
+    //
+    MemBound = 300,
+};
+
+enum class Device { CPU = 1, CUDA };
+
+using KernelAttrs = std::tuple<Device, OpType, DataType>;
+
+class OpRegistry {
+  public:
+    static std::string getOpName(OpType opType) {
+#define FOP(op)                                                                \
+    case OpType::op:                                                           \
+        return #op
+
+        switch (opType) {
+            FOP(Unknown);
+            // linear
+            FOP(Conv);
+            FOP(Matmul);
+            FOP(ConvTrans);
+            FOP(G2BMM);
+            FOP(GBMML);
+            FOP(Pad);
+            FOP(Slice);
+            FOP(Concat);
+            FOP(Split);
+            FOP(Transpose);
+            FOP(Extend);
+            FOP(MaxPool);
+            FOP(AvgPool);
+            FOP(Add);
+            FOP(Sub);
+            FOP(Mul);
+            FOP(Div);
+            FOP(Pow);
+            FOP(Gather);
+            FOP(ReduceMean);
+            FOP(Reshape);
+            FOP(Identity);
+            // element wise
+            FOP(BatchNorm);
+            FOP(Softmax);
+            FOP(Activation);
+            //
+            FOP(MemBound);
+        default:
+            IT_ASSERT(false);
+            break;
+        }
+#undef FOP
+    }
+};
+
+enum class ActType {
+    None,
+    Relu,
+    Sigmoid,
+    Tanh,
+};
+
+struct OpPerfKey {
+    HashType hash;
+    OpType opType;
+    vector<int> attrs;
+
+  public:
+    OpPerfKey(HashType hash, OpType opType, vector<int> attrs = {})
+        : hash(hash), opType(opType), attrs(attrs) {}
+    bool operator==(const OpPerfKey &rhs) const {
+        if (hash != rhs.hash)
+            return false;
+        if (opType != rhs.opType)
+            return false;
+        if (attrs != rhs.attrs)
+            return false;
+        return true;
+    }
+
+    // TODO: remove this function after we use unordered_map in PerfEngine
+    bool operator<(const OpPerfKey &rhs) const {
+        if (hash != rhs.hash)
+            return hash < rhs.hash;
+        if (opType != rhs.opType)
+            return opType < rhs.opType;
+        if (attrs.size() != rhs.attrs.size())
+            return attrs.size() < rhs.attrs.size();
+        for (size_t i = 0; i < attrs.size(); ++i)
+            if (attrs[i] != rhs.attrs[i])
+                return attrs[i] < rhs.attrs[i];
+        return false;
+    }
+};
+
+class OperatorNode : public Object {
+    friend class Kernel;
+
+  protected:
+    OpType type;
+    TensorVec inputs;
+    TensorVec outputs;
+    // vector<WRef<Operator>> predecessors;
+    // vector<WRef<Operator>> successors;
+
+  public:
+    OperatorNode(OpType opType, TensorVec inputs, TensorVec outputs)
+        : type(opType), inputs(inputs), outputs(outputs) {}
+    virtual vector<Shape> computeShape() const = 0;
+    virtual OpPerfKey getOpPerfKey() const = 0;
+
+  public: // check Op type
+    bool isLinearOp() const;
+    bool isElementWiseOp() const;
+    bool isSplitOp() const;
+    bool isConcatOp() const;
+    bool isComputeOp() const;
+    bool isTransposeOp() const;
+    bool isReshapeOp() const;
+    bool isMemBoundOp() const;
+
+  public: // getter and setter
+    // TensorVec getInputs() { return inputs; }
+    const TensorVec &getInputs() const { return inputs; }
+    // TensorVec getOutputs() { return outputs; }
+    const TensorVec &getOutputs() const { return outputs; }
+    Tensor getInputs(size_t i) { return inputs.at(i); }
+    Tensor getOutput() const {
+        IT_ASSERT(outputs.size() == 1, "Unimplemented");
+        return outputs[0];
+    }
+    OpType getOpType() const { return type; }
+
+    virtual int numInputs() const = 0;
+    virtual int numOutputs() const = 0;
+    virtual HashType hash() const { IT_TODO_HALT(); }
+    virtual HashType hashWithShape() const { IT_TODO_HALT(); }
+};
+
+} // namespace infini
+
+namespace std {
+template <> struct hash<infini::OpPerfKey> {
+    size_t operator()(const infini::OpPerfKey &key) const { return key.hash; }
+};
+} // namespace std
\ No newline at end of file
diff --git a/include/core/perf_engine.h b/include/core/perf_engine.h
new file mode 100644
index 00000000..563ad704
--- /dev/null
+++ b/include/core/perf_engine.h
@@ -0,0 +1,36 @@
+#pragma once
+#include "core/graph.h"
+#include "core/kernel.h"
+
+namespace infini {
+
+class PerfEngine {
+  public:
+    // TODO: Key should be OpPerfKey + Context(maybe implicat) to support
+    // multiple candiate kernels.
+    using Key = std::pair<KernelAttrs, OpPerfKey>;
+
+  private:
+    map<Key, PerfRecord> data;
+
+  public:
+    static PerfEngine &getInstance() {
+        static PerfEngine instance;
+        return instance;
+    }
+
+    std::optional<PerfRecord> getPerfData(const Key &key) {
+        auto it = data.find(key);
+        if (it != data.end()) // find previous evaluating results
+            return data.at(key);
+        else
+            return std::nullopt;
+    }
+
+    void setPerfData(const Key &key, const PerfRecord &record) {
+        IT_ASSERT(data.find(key) == data.end(), "Perf data already exist");
+        data.emplace(key, record);
+    }
+};
+
+} // namespace infini
\ No newline at end of file
diff --git a/include/core/ref.h b/include/core/ref.h
new file mode 100644
index 00000000..f5ba4e89
--- /dev/null
+++ b/include/core/ref.h
@@ -0,0 +1,35 @@
+#pragma once
+#include <functional> // hash
+#include <memory>
+#include <type_traits>
+
+namespace infini {
+
+template <typename T> using Ref = std::shared_ptr<T>;
+template <typename T> using WRef = std::weak_ptr<T>;
+
+template <typename T> struct is_ref : std::false_type {};
+template <typename T> struct is_ref<Ref<T>> : std::true_type {};
+template <typename T> struct is_ref<WRef<T>> : std::true_type {};
+
+template <typename T, typename... Params> Ref<T> make_ref(Params &&...params) {
+    static_assert(is_ref<T>::value == false, "Ref should not be nested");
+    return std::make_shared<T>(std::forward<Params>(params)...);
+}
+
+template <class T, class U,
+          typename std::enable_if_t<std::is_base_of_v<U, T>> * = nullptr>
+Ref<T> as(const Ref<U> &ref) {
+    return std::dynamic_pointer_cast<T>(ref);
+}
+
+template <typename T>
+std::vector<WRef<T>> get_wref_vec(const std::vector<Ref<T>> &vec) {
+    std::vector<WRef<T>> wref_vec;
+    wref_vec.reserve(vec.size());
+    for (const auto &ref : vec)
+        wref_vec.emplace_back(ref);
+    return wref_vec;
+}
+
+} // namespace infini
\ No newline at end of file
diff --git a/include/core/run_enigne.h b/include/core/run_enigne.h
new file mode 100644
index 00000000..1b4877a1
--- /dev/null
+++ b/include/core/run_enigne.h
@@ -0,0 +1,26 @@
+#pragma once
+#include "core/graph.h"
+#include "core/kernel.h"
+#include "core/perf_engine.h"
+
+namespace infini {
+
+class RunEngine {
+  private:
+    Device device;
+
+  public:
+    RunEngine(Device device) : device(device) {}
+    ~RunEngine() {}
+
+    void run(const Graph &graph, bool tune = false,
+             bool profiling = false) const;
+    double getPerfTime(const Graph &graph, bool profiling = false) const;
+
+  private:
+    void printProfilingData(double totTime,
+                            const std::map<OpType, double> &opTime,
+                            const std::map<OpType, int> &opCnt) const;
+};
+
+} // namespace infini
\ No newline at end of file
diff --git a/include/core/tensor.h b/include/core/tensor.h
new file mode 100644
index 00000000..67544753
--- /dev/null
+++ b/include/core/tensor.h
@@ -0,0 +1,180 @@
+#pragma once
+#include "core/tensor_base.h"
+
+namespace infini {
+
+// TODO: how to deal with this
+using ShapeElem = int;
+using Shape = vector<ShapeElem>;
+class TensorNode : public TensorBaseNode {
+  private:
+    Shape shape;
+
+  public:
+    TensorNode(const Shape &shape, DataType dtype);
+    virtual ~TensorNode() {}
+    string toString() const override;
+
+    size_t size() const;
+    void dataMalloc();
+
+    Shape getDims() const { return shape; }
+
+    size_t getOffset(const Shape &ds) const;
+    using TensorBaseNode::getData;
+    VType getData(const Shape &pos) const;
+    void copyData(VType *dptr);
+    void printData() const;
+    bool equalData(const Tensor &rhs) const;
+    // void setDims(const Dim &dms) { dims = dms; }
+
+    //     bool dataRand(int seed = 0) {
+    //         if (data == nullptr)
+    //             data = new VType[size()];
+    //         if (!random_inited)
+    //             initFastrand();
+    //         // srand(seed);
+    //         // faster rand generator; parallel
+    //         size_t iEnd = size();
+    //         // std::cerr << "Init beginned " << std::endl;
+    // #pragma omp parallel for
+    //         for (size_t i = 0; i < iEnd; ++i)
+    //             data[i] = fastrand(random_seed[omp_get_thread_num() * 16]) %
+    //             10000;
+    //         // std::cerr << "Init finished" << std::endl;
+    //         computed = ComputedFull;
+    //         return true;
+    //     }
+
+    //     bool setScalar(VType val) {
+    //         if (data == nullptr || !dims.empty())
+    //             return false;
+    //         data[0] = val;
+    //         return true;
+    //     }
+
+    //     bool setData(const Dim &ds, VType val) {
+    //         if (data == nullptr || ds.size() != dims.size())
+    //             return false;
+    //         data[getOffset(ds)] = val;
+    //         return true;
+    //     }
+
+    //     bool setData(size_t pos, VType val) {
+    //         if (data == nullptr || pos >= size())
+    //             return false;
+    //         data[pos] = val;
+    //         return true;
+    //     }
+
+    //     VType getScalar() { return data == nullptr ? 0 : data[0]; }
+
+    //     VType getBroadcastData(const Dim &ds) {
+    //         assert(data != nullptr);
+    //         auto offset = getBroadcastOffset(ds);
+    //         return offset == (size_t)-1 ? 0 : data[getOffset(ds)];
+    //     }
+
+    //     VType getBroadcastData(size_t pos) {
+    //         assert(data != nullptr);
+    //         return data[pos % size()];
+    //     }
+
+    //     size_t getBroadcastOffset(const Dim &ds) {
+    //         assert(ds.size() >= dims.size());
+    //         auto nDim = dims.size();
+    //         auto nBroadcastDim = ds.size() - nDim;
+    //         for (size_t i = 0; i < nDim; ++i)
+    //             if (ds[nBroadcastDim + i] < 0 || ds[nBroadcastDim + i] >=
+    //             dims[i])
+    //                 return (size_t)-1;
+    //         size_t idx = 0;
+    //         for (size_t i = 0; i < nDim; ++i)
+    //             idx = idx * dims[i] + ds[nBroadcastDim + i];
+    //         return idx;
+    //     }
+
+    //     void itInit() { it = Dim(dims.size(), 0); }
+
+    //     void itReset() {
+    //         itInit();
+    //         for (size_t i = 0, iEnd = it.size(); i < iEnd; ++i)
+    //             it[i] = 0;
+    //     }
+
+    //     bool itValid() {
+    //         if (it.size() != dims.size())
+    //             return false;
+    //         for (size_t i = 0, iEnd = it.size(); i < iEnd; ++i)
+    //             if (it[i] >= dims[i])
+    //                 return false;
+    //         return true;
+    //     }
+
+    //     const Dim &itGet() { return it; }
+
+    //     void itNext() {
+    //         auto p = it.size() - 1;
+    //         it[p] += 1;
+    //         while (p >= 1) {
+    //             if (it[p] == dims[p]) {
+    //                 it[p] = 0;
+    //                 it[--p] += 1;
+    //             } else
+    //                 break;
+    //         }
+    //     }
+
+    //     TensorType getType() const { return type; }
+    //     void setType(TensorType ty) { type = ty; }
+
+    //     static inline void initFastrand() {
+    //         assert(omp_get_max_threads() <= 256);
+    //         // srand(0); // constant seed for test
+    //         // align random_seed to avoid false sharing
+    //         for (int i = 0; i < 256 * 16; ++i) {
+    //             // random_seed[i] = rand();
+    //             // constant random seed for test
+    //             random_seed[i] = i;
+    //         }
+    //         random_inited = true;
+    //     }
+
+    //     static inline int fastrand(int &g_seed) {
+    //         g_seed = (214013 * g_seed + 2531011);
+    //         return (g_seed >> 16) & 0x7FFF;
+    //     }
+
+    //     std::vector<std::vector<int>> const *getSplittingPoints() const {
+    //         assert(!splittingPoints.empty());
+    //         return &splittingPoints;
+    //     }
+
+    //     bool setSplittingPoints(std::vector<std::vector<int>> value) {
+    //         assert(!value.empty());
+    //         splittingPoints = value;
+    //         return true;
+    //     }
+
+    //     void printSplittingPoints() {
+    //         if (splittingPoints.empty())
+    //             printf("Empty SplittingPoints");
+    //         else {
+    //             printf("[");
+    //             for (auto &vs : splittingPoints) {
+    //                 printf("[");
+    //                 for (auto v : vs)
+    //                     printf("%2d,", v);
+    //                 printf("],");
+    //             }
+    //             printf("]");
+    //         }
+    //     }
+
+    //     void initSplittingPoints() {
+    //     splittingPoints.resize(getDims().size()); }
+
+    //     void printShape();
+};
+
+} // namespace infini
\ No newline at end of file
diff --git a/include/core/tensor_base.h b/include/core/tensor_base.h
new file mode 100644
index 00000000..cafea062
--- /dev/null
+++ b/include/core/tensor_base.h
@@ -0,0 +1,261 @@
+#pragma once
+#include "core/object.h"
+#include "core/ref.h"
+
+namespace infini {
+
+// class Tensor;
+class TensorBaseNode;
+class TensorNode;
+class OperatorNode;
+class GraphNode;
+
+using TensorBase = Ref<TensorBaseNode>;
+using Tensor = Ref<TensorNode>;
+using Operator = Ref<OperatorNode>;
+using Graph = Ref<GraphNode>;
+
+using TensorVec = vector<Tensor>;
+using OpVec = vector<Operator>;
+
+using VType = uint32_t;
+
+enum class DataType {
+    Float32,
+    Int32,
+};
+
+class TensorBaseNode : public Object {
+  public:
+    // enum TensorType {
+    //     Input,
+    //     Weight,
+    //     Invalid,
+    //     NotCounted,
+    // };
+
+  protected:
+    int dim;
+
+    DataType dtype;
+    vector<WRef<TensorBaseNode>> inputOf;
+    WRef<TensorBaseNode> outputOf;
+    // TODO: Ref<void> -> Ref<Blob>
+    Ref<VType[]> data;
+    // ComputeState computed;
+    // static int random_seed[256 * 16];
+    // static bool random_inited;
+
+  public:
+    TensorBaseNode(int dim, DataType dtype);
+    virtual ~TensorBaseNode() {}
+
+    Ref<VType[]> getDataPtr() const { return data; }
+    VType getData(size_t offset) const;
+
+    DataType getDType() const { return dtype; }
+
+    // uint64_t getHash() const { return hash; }
+
+    //     void setInputOf(const OpVec &ops) {
+    //         inputOf.clear();
+    //         for (const auto &op : ops)
+    //             inputOf.emplace_back(op);
+    //     }
+    //     void addInputOf(Operator op) { inputOf.emplace_back(op); }
+    //     void setOutputOf(Operator op) { outputOf = op; }
+
+    //     const OpVec &getInputOf() { return inputOf; }
+    //     Operator *getOutputOf() { return outputOf; }
+    //     std::pair<Operator *, int> getOutputOfWithIndex();
+
+    //     const Dim &getDims() const { return dims; }
+    //     void setDims(const Dim &dms) { dims = dms; }
+
+    //     bool dataRand(int seed = 0) {
+    //         if (data == nullptr)
+    //             data = new VType[size()];
+    //         if (!random_inited)
+    //             initFastrand();
+    //         // srand(seed);
+    //         // faster rand generator; parallel
+    //         size_t iEnd = size();
+    //         // std::cerr << "Init beginned " << std::endl;
+    // #pragma omp parallel for
+    //         for (size_t i = 0; i < iEnd; ++i)
+    //             data[i] = fastrand(random_seed[omp_get_thread_num() * 16]) %
+    //             10000;
+    //         // std::cerr << "Init finished" << std::endl;
+    //         computed = ComputedFull;
+    //         return true;
+    //     }
+
+    //     bool setScalar(VType val) {
+    //         if (data == nullptr || !dims.empty())
+    //             return false;
+    //         data[0] = val;
+    //         return true;
+    //     }
+
+    //     bool setData(const Dim &ds, VType val) {
+    //         if (data == nullptr || ds.size() != dims.size())
+    //             return false;
+    //         data[getOffset(ds)] = val;
+    //         return true;
+    //     }
+
+    //     bool setData(size_t pos, VType val) {
+    //         if (data == nullptr || pos >= size())
+    //             return false;
+    //         data[pos] = val;
+    //         return true;
+    //     }
+
+    //     VType getScalar() { return data == nullptr ? 0 : data[0]; }
+
+    //     VType getData(const Dim &ds) {
+    //         assert(data != nullptr);
+    //         auto offset = getOffset(ds);
+    //         return offset == (size_t)-1 ? 0 : data[getOffset(ds)];
+    //     }
+
+    //     VType getData(size_t pos) {
+    //         assert(data != nullptr);
+    //         assert(pos < size());
+    //         return data[pos];
+    //     }
+
+    //     VType *getDataPtr() const { return data; }
+
+    //     size_t getOffset(const Dim &ds) {
+    //         auto nDim = ds.size();
+    //         assert(dims.size() == nDim);
+    //         if (ds.empty())
+    //             return 0;
+    //         for (size_t i = 0; i < nDim; ++i)
+    //             if (ds[i] < 0 || ds[i] >= dims[i])
+    //                 return (size_t)-1;
+    //         size_t idx = ds[0];
+    //         size_t dm = 0;
+    //         while (++dm < nDim)
+    //             idx = idx * dims[dm] + ds[dm];
+    //         return idx;
+    //     }
+
+    //     VType getBroadcastData(const Dim &ds) {
+    //         assert(data != nullptr);
+    //         auto offset = getBroadcastOffset(ds);
+    //         return offset == (size_t)-1 ? 0 : data[getOffset(ds)];
+    //     }
+
+    //     VType getBroadcastData(size_t pos) {
+    //         assert(data != nullptr);
+    //         return data[pos % size()];
+    //     }
+
+    //     size_t getBroadcastOffset(const Dim &ds) {
+    //         assert(ds.size() >= dims.size());
+    //         auto nDim = dims.size();
+    //         auto nBroadcastDim = ds.size() - nDim;
+    //         for (size_t i = 0; i < nDim; ++i)
+    //             if (ds[nBroadcastDim + i] < 0 || ds[nBroadcastDim + i] >=
+    //             dims[i])
+    //                 return (size_t)-1;
+    //         size_t idx = 0;
+    //         for (size_t i = 0; i < nDim; ++i)
+    //             idx = idx * dims[i] + ds[nBroadcastDim + i];
+    //         return idx;
+    //     }
+
+    //     void itInit() { it = Dim(dims.size(), 0); }
+
+    //     void itReset() {
+    //         itInit();
+    //         for (size_t i = 0, iEnd = it.size(); i < iEnd; ++i)
+    //             it[i] = 0;
+    //     }
+
+    //     bool itValid() {
+    //         if (it.size() != dims.size())
+    //             return false;
+    //         for (size_t i = 0, iEnd = it.size(); i < iEnd; ++i)
+    //             if (it[i] >= dims[i])
+    //                 return false;
+    //         return true;
+    //     }
+
+    //     const Dim &itGet() { return it; }
+
+    //     void itNext() {
+    //         auto p = it.size() - 1;
+    //         it[p] += 1;
+    //         while (p >= 1) {
+    //             if (it[p] == dims[p]) {
+    //                 it[p] = 0;
+    //                 it[--p] += 1;
+    //             } else
+    //                 break;
+    //         }
+    //     }
+
+    //     size_t size() const {
+    //         size_t sz = 1;
+    //         auto dm = dims.size();
+    //         while (dm > 0)
+    //             sz *= dims[--dm];
+    //         return sz;
+    //     }
+
+    //     TensorType getType() const { return type; }
+    //     void setType(TensorType ty) { type = ty; }
+
+    //     static inline void initFastrand() {
+    //         assert(omp_get_max_threads() <= 256);
+    //         // srand(0); // constant seed for test
+    //         // align random_seed to avoid false sharing
+    //         for (int i = 0; i < 256 * 16; ++i) {
+    //             // random_seed[i] = rand();
+    //             // constant random seed for test
+    //             random_seed[i] = i;
+    //         }
+    //         random_inited = true;
+    //     }
+
+    //     static inline int fastrand(int &g_seed) {
+    //         g_seed = (214013 * g_seed + 2531011);
+    //         return (g_seed >> 16) & 0x7FFF;
+    //     }
+
+    //     std::vector<std::vector<int>> const *getSplittingPoints() const {
+    //         assert(!splittingPoints.empty());
+    //         return &splittingPoints;
+    //     }
+
+    //     bool setSplittingPoints(std::vector<std::vector<int>> value) {
+    //         assert(!value.empty());
+    //         splittingPoints = value;
+    //         return true;
+    //     }
+
+    //     void printSplittingPoints() {
+    //         if (splittingPoints.empty())
+    //             printf("Empty SplittingPoints");
+    //         else {
+    //             printf("[");
+    //             for (auto &vs : splittingPoints) {
+    //                 printf("[");
+    //                 for (auto v : vs)
+    //                     printf("%2d,", v);
+    //                 printf("],");
+    //             }
+    //             printf("]");
+    //         }
+    //     }
+
+    //     void initSplittingPoints() {
+    //     splittingPoints.resize(getDims().size()); }
+
+    //     void printShape();
+};
+
+} // namespace infini
diff --git a/include/nnet/Pass/MatchComputationKernel.h b/include/nnet/Pass/MatchComputationKernel.h
new file mode 100644
index 00000000..4b182a89
--- /dev/null
+++ b/include/nnet/Pass/MatchComputationKernel.h
@@ -0,0 +1,15 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+
+namespace nnet {
+
+class MatchComputationKernel : public Pass {
+  public:
+    MatchComputationKernel(Derivator &derivator)
+        : Pass(derivator, "MatchComputationKernel") {}
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/MatchMemBoundKernel.h b/include/nnet/Pass/MatchMemBoundKernel.h
new file mode 100644
index 00000000..6b0a4bec
--- /dev/null
+++ b/include/nnet/Pass/MatchMemBoundKernel.h
@@ -0,0 +1,15 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+
+namespace nnet {
+
+class MatchMemBoundKernel : public Pass {
+  public:
+    MatchMemBoundKernel(Derivator &derivator)
+        : Pass(derivator, "MatchMemBoundKernel") {}
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Pass.h b/include/nnet/Pass/Pass.h
new file mode 100644
index 00000000..a8d517fa
--- /dev/null
+++ b/include/nnet/Pass/Pass.h
@@ -0,0 +1,41 @@
+#pragma once
+#include "nnet/derivator.h"
+
+namespace nnet {
+
+class Pass {
+  private:
+    VecExpr transformations;
+
+  protected:
+    Derivator &derivator;
+    string passName;
+    /**
+     * @brief // False if does not add log in Derivator. It should be false for
+     * single Pass test to avoid mismatch of passInfos and passMsgs  due to
+     * different number of "run" and "nextStep".
+     */
+    bool enableLogging, enableDebug;
+
+    virtual void transform(Formula &origin, int depth, Expr &rCur) = 0;
+    void nextStep(Formula &origin, int depth, Expr &rCur, Expr newCur,
+                  const string &ruleInfo = "");
+
+    Var getNewVar();
+    string newTensorName();
+
+  private:
+    void initialize(Formula &origin, const Expr &rCur);
+    void finalize();
+
+  public:
+    Pass(Derivator &derivator, const string &passName);
+    virtual ~Pass();
+
+    void run(Formula &origin, int dfsDepth, Expr &rCur);
+    void setEnableLogging(bool value);
+    void setEnableDebug(bool value);
+    const VecExpr &getTransformations();
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule1VariableSplit.h b/include/nnet/Pass/Rule1VariableSplit.h
new file mode 100644
index 00000000..06f2bb4e
--- /dev/null
+++ b/include/nnet/Pass/Rule1VariableSplit.h
@@ -0,0 +1,18 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+#include "nnet/ReplaceKit.h"
+
+namespace nnet {
+
+class Rule1VariableSplit : public Pass {
+  public:
+    Rule1VariableSplit(Derivator &derivator)
+        : Pass(derivator, "Rule1VariableSplit") {}
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+    vector<Replace> getSplitableVar(const RangeOp &rangeOp);
+    Expr replaceIters(Expr cur, const Replace &replace);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule2VariableMerging.h b/include/nnet/Pass/Rule2VariableMerging.h
new file mode 100644
index 00000000..1f277f46
--- /dev/null
+++ b/include/nnet/Pass/Rule2VariableMerging.h
@@ -0,0 +1,29 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+#include "nnet/ReplaceKit.h"
+
+namespace nnet {
+
+class Rule2VariableMerging : public Pass {
+  private:
+    map<int, vector<Var>> substituteRules;
+
+  public:
+    Rule2VariableMerging(Derivator &derivator)
+        : Pass(derivator, "Rule2VariableMerging") {}
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+
+    vector<Replace> getMergableReplaces(RangeOp rangeOp, int depth);
+    optional<Replace> getReplaceMergingTwoLoopIters(const RangeOp &rangeOp,
+                                                    pair<Iterator, int> pairA,
+                                                    pair<Iterator, int> pairB,
+                                                    const IteratorTable &exprIT,
+                                                    int tensorID);
+    optional<Replace> getReplaceMappingTwoLoopIters(const RangeOp &rangeOp,
+                                                    pair<Iterator, int> pa,
+                                                    pair<Iterator, int> pb);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule3StageSplit.h b/include/nnet/Pass/Rule3StageSplit.h
new file mode 100644
index 00000000..99e172cf
--- /dev/null
+++ b/include/nnet/Pass/Rule3StageSplit.h
@@ -0,0 +1,19 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+
+namespace nnet {
+
+class Rule3StageSplit : public Pass {
+  private:
+    map<int, vector<Var>> substituteRules;
+
+  public:
+    Rule3StageSplit(Derivator &derivator)
+        : Pass(derivator, "Rule3StageSplit") {}
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+    vector<vector<Var>> getSplitSummationIters(RangeOp rangeOp);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule4StageMerging.h b/include/nnet/Pass/Rule4StageMerging.h
new file mode 100644
index 00000000..13f11074
--- /dev/null
+++ b/include/nnet/Pass/Rule4StageMerging.h
@@ -0,0 +1,22 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+
+namespace nnet {
+
+class Rule4StageMerging : public Pass {
+    bool success, mergeStageWithCalc;
+
+  public:
+    Rule4StageMerging(Derivator &derivator)
+        : Pass(derivator, "Rule4StageMerging"), success(false),
+          mergeStageWithCalc(false) {}
+    bool rule4StageMerging(Formula &origin, int depth, Expr &rCur,
+                           bool mergeStageWithCalc = false);
+    bool isSuccessful();
+    void setMergeStageWithCalc(bool value);
+
+  private:
+    virtual void transform(Formula &origin, int depth, Expr &rCur) override;
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule5RangeRelaxation.h b/include/nnet/Pass/Rule5RangeRelaxation.h
new file mode 100644
index 00000000..c1719dfc
--- /dev/null
+++ b/include/nnet/Pass/Rule5RangeRelaxation.h
@@ -0,0 +1,16 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+
+namespace nnet {
+
+class Rule5RangeRelaxation : public Pass {
+  public:
+    Rule5RangeRelaxation(Derivator &derivator)
+        : Pass(derivator, "Rule5RangeRelaxation") {}
+    Expr rule5RangeRelaxation(Formula &origin, int depth, Expr &rCur);
+
+  private:
+    virtual void transform(Formula &origin, int depth, Expr &rCur) override;
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule6KenerlMatching.h b/include/nnet/Pass/Rule6KenerlMatching.h
new file mode 100644
index 00000000..19648eaf
--- /dev/null
+++ b/include/nnet/Pass/Rule6KenerlMatching.h
@@ -0,0 +1,17 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+
+namespace nnet {
+
+class Rule6KenerlMatching : public Pass {
+  public:
+    Rule6KenerlMatching(Derivator &derivator)
+        : Pass(derivator, "Rule6KenerlMatching") {}
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+    // RE: seperating this func is a choice.
+    VecExpr matchElementWise(const RangeOp &rangeOp);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule7DLT.h b/include/nnet/Pass/Rule7DLT.h
new file mode 100644
index 00000000..d2ce0ecc
--- /dev/null
+++ b/include/nnet/Pass/Rule7DLT.h
@@ -0,0 +1,16 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+
+namespace nnet {
+
+class Rule7DLT : public Pass {
+  public:
+    Rule7DLT(Derivator &derivator) : Pass(derivator, "Rule7DLT") {}
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+    Expr buildDLTSingleRangeOp(const RangeOp &original, const Expr &newSummand);
+    vector<int> getFactors();
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule8GuidedDLT.h b/include/nnet/Pass/Rule8GuidedDLT.h
new file mode 100644
index 00000000..e6536df4
--- /dev/null
+++ b/include/nnet/Pass/Rule8GuidedDLT.h
@@ -0,0 +1,48 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+#include "nnet/ReplaceKit.h"
+
+namespace nnet {
+
+class Rule8GuidedDLT : public Pass {
+  public:
+    Rule8GuidedDLT(Derivator &derivator) : Pass(derivator, "Rule8GuidedDLT") {}
+    VecExpr guidedDLT(Formula &origin, int depth, Expr &rCur,
+                      bool debug = false);
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+    /**
+     * @brief If only one row miss match (more iterators mismatch), directly do
+     * data layout construction according to the IT.
+     *
+     * @return Expr Return nullptr if failed.
+     */
+    Expr guidedDLTMoreVar2(const RangeOp &cur, const Mismatch &mismatch,
+                           const IteratorTable &exprIT, const Pattern &pattern);
+    /**
+     * @brief Check whether two iterators overlap each other. If overlapping, we
+     * cannot simply reconstruct the tensor into a new one by seperate all
+     * iterators into different dimensions.
+     */
+    bool checkElementsHaveOnlyOneAccessIteratorSet(const IteratorTable &exprIT,
+                                                   int tensorID);
+    /**
+     * @brief Only product of two tensors can be guided DLTed.
+     *
+     * @param cur
+     * @return true
+     * @return false
+     */
+    bool statisfyGuidedDLT(RangeOp cur) const;
+    /**
+     * @brief Deal with output DLT mismatch only.
+     */
+    Expr guidedDLTDLMismatch(const RangeOp &cur, const Mismatch &mismatch,
+                             const IteratorTable &exprIT,
+                             const Pattern &pattern);
+    Expr buildGuidedDLTSource(const Subscript &originalSub, Replace replace,
+                              vector<Var> tensorDimAxes, vector<int> newShape);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule90TwoStageElementWise.h b/include/nnet/Pass/Rule90TwoStageElementWise.h
new file mode 100644
index 00000000..ab37cf99
--- /dev/null
+++ b/include/nnet/Pass/Rule90TwoStageElementWise.h
@@ -0,0 +1,16 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+
+namespace nnet {
+
+class Rule90TwoStageElementWise : public Pass {
+  public:
+    Rule90TwoStageElementWise(Derivator &derivator)
+        : Pass(derivator, "Rule90TwoStageElementWise") {}
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+    VecExpr matchTwoStageElementWise(const RangeOp &rangeOp);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule91MergeStagesWithSum.h b/include/nnet/Pass/Rule91MergeStagesWithSum.h
new file mode 100644
index 00000000..221e1772
--- /dev/null
+++ b/include/nnet/Pass/Rule91MergeStagesWithSum.h
@@ -0,0 +1,15 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+
+namespace nnet {
+
+class Rule91MergeStagesWithSum : public Pass {
+  public:
+    Rule91MergeStagesWithSum(Derivator &derivator)
+        : Pass(derivator, "Rule91MergeStagesWithSum") {}
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Pass/Rule9RangeMagnify.h b/include/nnet/Pass/Rule9RangeMagnify.h
new file mode 100644
index 00000000..613de158
--- /dev/null
+++ b/include/nnet/Pass/Rule9RangeMagnify.h
@@ -0,0 +1,15 @@
+#pragma once
+#include "nnet/Pass/Pass.h"
+
+namespace nnet {
+
+class Rule9RangeMagnify : public Pass {
+  public:
+    Rule9RangeMagnify(Derivator &derivator)
+        : Pass(derivator, "Rule9RangeMagnify") {}
+
+  private:
+    virtual void transform(Formula &origin, int dfsDepth, Expr &rCur) override;
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/ReplaceKit.h b/include/nnet/ReplaceKit.h
new file mode 100644
index 00000000..19f2ec0e
--- /dev/null
+++ b/include/nnet/ReplaceKit.h
@@ -0,0 +1,46 @@
+#pragma once
+#include "nnet/expr.h"
+
+namespace nnet {
+
+struct Replace {
+    int iteratorType;
+    vector<Var> oldIters; // i_1, ...
+    vector<Var> newIters; // j_1, ...
+    VecExpr phis;         // j_1=\phi_1(i_1, ...), not necessary for Sum iter
+    VecExpr psis;         // i_1=\psi_1(j_1, ...)
+    vector<VarRangePair> newVarRanges;
+
+    bool isReplaced(Var var) const {
+        for (const auto &iter : oldIters)
+            if (iter->equal(var))
+                return true;
+        return false;
+    }
+
+    string toReadable() const {
+        string ret = "Old iters: " + serializeVec(oldIters) +
+                     ", new iters: " + serializeVec(newIters);
+        ret += " phis: " + serializeVec(phis) + " psis: " + serializeVec(psis);
+        return ret;
+    }
+};
+
+class ReplaceKit {
+  public:
+    static RangeOp replaceRangeOpIterator(const RangeOp &rangeOp,
+                                          const Replace &replace,
+                                          const Expr &replacedSummand);
+    static Subscript buildSubscirptForLoopVarReplace(const RangeOp &inner,
+                                                     const Replace &replace);
+    static RangeOp buildDLTOuterRangeOp(const RangeOp &original,
+                                        const Subscript &subscriptedNewRangeOp);
+    static Expr replaceMultipleExprs(const Expr &cur,
+                                     const vector<Var> &patterns,
+                                     const VecExpr &replacements,
+                                     bool simplify = true);
+    static Expr replaceExpr(const Expr &cur, const Expr &pattern,
+                            const Expr &replacement);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/AsTVMVisitor.h b/include/nnet/Visitor/AsTVMVisitor.h
new file mode 100644
index 00000000..f193231a
--- /dev/null
+++ b/include/nnet/Visitor/AsTVMVisitor.h
@@ -0,0 +1,38 @@
+#pragma once
+#include "nnet/Visitor/StrideVisitor.h"
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class AsTVMVisitor : public Functor<std::string(void)> {
+  private:
+    int nStage = 0, curStage = -1;
+    std::unordered_map<std::string, int> offset;
+    std::vector<std::string> inputs;
+    std::string output;
+    std::vector<std::string> pythonVars;
+    std::vector<std::vector<int>> inputShapes;
+    std::vector<int> outputShape;
+    std::string stmts;
+
+  public:
+    std::string getStmts() const;
+
+    const std::vector<std::string> &getInputs() const { return inputs; }
+    const std::string &getOutput() const { return output; }
+
+    const std::vector<std::vector<int>> &getInputShapes() const {
+        return inputShapes;
+    }
+    const std::vector<int> &getOutputShape() const { return outputShape; }
+
+    std::string visit_(const Constant &c) override;
+    std::string visit_(const BinaryOp &c) override;
+    std::string visit_(const Func &c) override;
+    std::string visit_(const RangeOp &c) override;
+    std::string visit_(const Subscript &c) override;
+    std::string visit_(const Var &c) override;
+    std::string visit_(const Tensor &c) override;
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/CheckOOBVisitor.h b/include/nnet/Visitor/CheckOOBVisitor.h
new file mode 100644
index 00000000..017a7ae9
--- /dev/null
+++ b/include/nnet/Visitor/CheckOOBVisitor.h
@@ -0,0 +1,22 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class CheckOOBVisitor : public ExprTreeVisitor {
+    RangeOp rangeOp;
+    bool detect = false;
+
+  public:
+    CheckOOBVisitor(int _verobse = 0) : ExprTreeVisitor(1, 1, 0, 0, _verobse) {}
+    void visit_(const Subscript &c) override;
+
+    /**
+     * @brief
+     * @return true If there is OOB
+     * @return false If there is no OOB
+     */
+    bool checkRangeOp(const RangeOp &_rangeOp);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/CloneMutator.h b/include/nnet/Visitor/CloneMutator.h
new file mode 100644
index 00000000..6812cb80
--- /dev/null
+++ b/include/nnet/Visitor/CloneMutator.h
@@ -0,0 +1,16 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+// Clone ExprNodes in a stage except Tensor, Var, and Constant nodes.
+class CloneMutator : public Mutator {
+  public:
+    CloneMutator() : Mutator(false) {}
+    Expr visit_(const Constant &c) override;
+    Expr visit_(const Var &c) override;
+    Expr visit_(const Tensor &c) override;
+    Expr clone(const Expr &c) { return dispatch(c); }
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/CompareMultiFormulasVisitor.h b/include/nnet/Visitor/CompareMultiFormulasVisitor.h
new file mode 100644
index 00000000..a120d48e
--- /dev/null
+++ b/include/nnet/Visitor/CompareMultiFormulasVisitor.h
@@ -0,0 +1,15 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class CompareMultiFormulasVisitor : public ExprTreeVisitor {
+    vector<VarRangePair> newSumVarRanges;
+    RangeOp newRangeOp;
+
+  public:
+    CompareMultiFormulasVisitor() : ExprTreeVisitor() {}
+    bool compare(const VecExpr &roots);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/CountRoutineVisitor.h b/include/nnet/Visitor/CountRoutineVisitor.h
new file mode 100644
index 00000000..d9536495
--- /dev/null
+++ b/include/nnet/Visitor/CountRoutineVisitor.h
@@ -0,0 +1,18 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class CountRoutineVisitor : public ExprTreeVisitor {
+  private:
+    vector<int> cnts;
+
+  public:
+    CountRoutineVisitor(int _verobse = 0)
+        : ExprTreeVisitor(1, 1, 1, 1, _verobse) {}
+    void visit_(const Tensor &c) override;
+    vector<int> count(const Expr &root);
+    bool match(const Expr &root, int nMatmul = 0, int nConv = 0,
+               int nElement = 0, int nSg2bmm = 0, int nLongformerGBMM = 0);
+};
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/FullPrinterVisitor.h b/include/nnet/Visitor/FullPrinterVisitor.h
new file mode 100644
index 00000000..a9caeccf
--- /dev/null
+++ b/include/nnet/Visitor/FullPrinterVisitor.h
@@ -0,0 +1,25 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class FullPrinterVisitor : public ExprTreeVisitor {
+  private:
+    vector<tuple<string, Routine, Tensor>> q;
+
+  public:
+    FullPrinterVisitor(int _verobse = 0)
+        : ExprTreeVisitor(1, 1, 1, 0, _verobse) {}
+    void visit_(const Tensor &c) override;
+
+    string print(const Expr &root);
+    /**
+     * @brief Get all tensors & OPs in a reversed order
+     *
+     * @param root
+     * @return vector<<Output TensorName, RoutineNode, output tensor in NNet>>
+     */
+    const vector<tuple<string, Routine, Tensor>> &traverse(const Expr &root);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/GetTensorsVisitor.h b/include/nnet/Visitor/GetTensorsVisitor.h
new file mode 100644
index 00000000..66a8d5dc
--- /dev/null
+++ b/include/nnet/Visitor/GetTensorsVisitor.h
@@ -0,0 +1,22 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+// Get all tensors in the stage
+class GetTensorsVisitor : public ExprTreeVisitor {
+  private:
+    unordered_map<string, Tensor> tensors;
+
+    void visit_(const Tensor &c) override;
+
+  public:
+    GetTensorsVisitor(int _verobse = 0)
+        : ExprTreeVisitor(1, 1, 1, 0, _verobse) {}
+    auto get(const Expr &c) {
+        dispatch(c);
+        return tensors;
+    }
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/HashVisitor.h b/include/nnet/Visitor/HashVisitor.h
new file mode 100644
index 00000000..0d20f49d
--- /dev/null
+++ b/include/nnet/Visitor/HashVisitor.h
@@ -0,0 +1,31 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+// Calculate hash for a normal form, starting at a RangeOp
+class HashVisitor : public Functor<HashType(void)> {
+    inline const static HashType BKDR_SEED[] = {131, 313, 10007, 65599};
+
+    PtrUmap<Iterator, int> varHash;
+    int nLoopVars = 0;
+    PtrUmap<Iterator, int> name2id;
+    vector<int> rootId;
+    vector<bool> haveAlias;
+    int nVars = 0;
+    vector<HashType> power;
+
+  private:
+    HashType visit_(const Constant &c) override;
+    HashType visit_(const BinaryOp &c) override;
+    HashType visit_(const RangeOp &c) override;
+    HashType visit_(const Subscript &c) override;
+    HashType visit_(const Tensor &c) override;
+    HashType visit_(const Var &c) override;
+
+  public:
+    HashVisitor(int _verobse = 0) : Functor(_verobse) {}
+    HashType getHash(const Expr &c);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/InputVisitor.h b/include/nnet/Visitor/InputVisitor.h
new file mode 100644
index 00000000..1bc3c89a
--- /dev/null
+++ b/include/nnet/Visitor/InputVisitor.h
@@ -0,0 +1,23 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class InputVisitor : public ExprTreeVisitor {
+    vector<Tensor> inputs;
+
+  public:
+    int nInputs = 0;
+    InputVisitor(int _verobse = 0) : ExprTreeVisitor(1, 1, 1, 0, _verobse) {}
+    void visit_(const Tensor &c) override;
+
+    /**
+     * @brief Get the all inputs in the netsed stages
+     */
+    vector<Tensor> getInputs(const RangeOp &_rangeOp) {
+        dispatch(_rangeOp);
+        return inputs;
+    }
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/Interpreter.h b/include/nnet/Visitor/Interpreter.h
new file mode 100644
index 00000000..b4479b79
--- /dev/null
+++ b/include/nnet/Visitor/Interpreter.h
@@ -0,0 +1,55 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class Interpreter : public Functor<int()> {
+  public:
+    using ttype = int; // Test data type
+    using rtype = int; // Return data type
+    using Position = vector<int>;
+    using Inputs = unordered_map<string, Ref<vector<ttype>>>;
+    using Iteration = PtrUmap<Var, int>;
+
+  private:
+    // cache the input value
+    Inputs inputs;
+    vector<Iteration> iterations;
+    vector<Position> positions;
+
+    rtype visit_(const Constant &c) override;
+    rtype visit_(const BinaryOp &c) override;
+    rtype visit_(const RangeOp &c) override;
+    rtype visit_(const Subscript &c) override;
+    rtype visit_(const Var &c) override;
+    rtype visit_(const Tensor &c) override;
+    // int visit_(const Func &c); // Future work
+
+    static Inputs genInputStartingFromZero(const RangeOp &range);
+
+  public:
+    Interpreter(Inputs _inputs, int _verbose = 0)
+        : Functor(_verbose), inputs(_inputs) {}
+    Interpreter(RangeOp range, int _verbose = 0);
+
+    /**
+     * @brief Calculate the output at specified poistions
+     *
+     * @param expr The expression to be calculated.
+     * @param poses Positions of output.
+     * @return vector<int> Value of output.
+     */
+    vector<rtype> interpret(const Expr &expr, const vector<Position> &poses);
+    /**
+     * @brief Calculate the output at equally spaced positions
+     *
+     * @param expr The expression to be calculated.
+     * @param nPoses The number of calculated output positions.
+     * @return vector<int> Value of output.
+     */
+    vector<rtype> interpretUniformSample(const RangeOp &range,
+                                         int nPoses = 100);
+    vector<rtype> interpretAllOutput(const RangeOp &range);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/MatchReshapeVisitor.h b/include/nnet/Visitor/MatchReshapeVisitor.h
new file mode 100644
index 00000000..caf0130e
--- /dev/null
+++ b/include/nnet/Visitor/MatchReshapeVisitor.h
@@ -0,0 +1,14 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class MatchReshapeVisitor : public Functor<bool(void)> {
+  private:
+    PtrMap<Iterator, int> _coefficient;
+
+  public:
+    bool visit_(const RangeOp &c) override;
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/MatchTableVisitor.h b/include/nnet/Visitor/MatchTableVisitor.h
new file mode 100644
index 00000000..53836883
--- /dev/null
+++ b/include/nnet/Visitor/MatchTableVisitor.h
@@ -0,0 +1,60 @@
+#pragma once
+#include "nnet/Visitor/StrideVisitor.h"
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class MatchTableVisitor
+    : public Functor<void(const Tensor &, int dim, optional<int> stride)> {
+  private:
+    // Var -> {(tensor, dim)}
+    Appearance appearance;
+    vector<Tensor> tensors;
+    vector<Subscript> subscripts;
+    StrideTable strideTable;
+    PtrMap<Iterator, vector<vector<int>>>
+        strideInDim; // [Iterator][tensorID][dim]=stride
+
+    // Intermediate variable
+    // product of a sub-exprtree: Stride has to be done in two DFS
+    SubexprSride subexprStride;
+    bool hasUnsupportedOp = false;
+
+  public:
+    MatchTableVisitor(int _verobse = 0) : Functor(_verobse) {}
+    void visit_(const BinaryOp &c, const Tensor &tensor, int dim,
+                optional<int> stride) override;
+    void visit_(const Subscript &c, const Tensor &tensor, int dim,
+                optional<int> stride) override;
+    void visit_(const Var &c, const Tensor &tensor, int dim,
+                optional<int> stride) override;
+    void visit_(const Constant &c, const Tensor &tensor, int dim,
+                optional<int> stride) override;
+    // void visit_(const Tensor &c, const Tensor &tensor) override;
+
+    [[nodiscard]] bool operator()(const RangeOp &e) {
+        hasUnsupportedOp = false;
+        // get the location and stride of each iterator
+        auto mulOp = as<BinaryOpNode>(e->getSummand());
+        // TODO [feature]: support complex index exprs
+        if (!mulOp || mulOp->getOpType() != OpType::Mul) {
+            nnet_unimplemented_continue();
+            return false;
+        }
+        StrideVisitor strideVisitor(0);
+        subexprStride = strideVisitor.getFormulaStride(e);
+        dispatch(mulOp->getLhs(), nullptr, 0, 0);
+        dispatch(mulOp->getRhs(), nullptr, 0, 0);
+        subscripts.emplace_back(as<SubscriptNode>(mulOp->getLhs()));
+        subscripts.emplace_back(as<SubscriptNode>(mulOp->getRhs()));
+        assert(tensors.size() == subscripts.size());
+        assert(tensors.size() < 5);
+        return !hasUnsupportedOp;
+    }
+
+    auto getResult() const {
+        return tuple(appearance, tensors, strideTable, subscripts);
+    }
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/MatmulTransposeMutator.h b/include/nnet/Visitor/MatmulTransposeMutator.h
new file mode 100644
index 00000000..000467d2
--- /dev/null
+++ b/include/nnet/Visitor/MatmulTransposeMutator.h
@@ -0,0 +1,18 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class MatmulTransposeMutator : public Mutator {
+    Derivator &derivator;
+
+  public:
+    MatmulTransposeMutator(Derivator &derivator)
+        : Mutator(1), derivator(derivator) {}
+    VecExpr transpose(const Tensor &tensor);
+
+  private:
+    Tensor transposeInput(const Tensor &tensor);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/MergeMemboundMutator.h b/include/nnet/Visitor/MergeMemboundMutator.h
new file mode 100644
index 00000000..786103e1
--- /dev/null
+++ b/include/nnet/Visitor/MergeMemboundMutator.h
@@ -0,0 +1,20 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class MergeMemboundMutator : public Mutator {
+    VecExpr kernels;
+    int curDepth; // from the last one to the first one
+    Expr visit_(const Tensor &c) override;
+    // FIXME: duplicate code
+    Expr rule4StageMerging(Expr &rCur, bool mergeStageWithCalc);
+    bool checkEmpty();
+
+  public:
+    MergeMemboundMutator(const VecExpr &kernels)
+        : Mutator(), kernels(kernels), curDepth(kernels.size() - 1) {}
+    Expr merge(bool allowEmptyMembound = false);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/PatternMatcher.h b/include/nnet/Visitor/PatternMatcher.h
new file mode 100644
index 00000000..6765a4e0
--- /dev/null
+++ b/include/nnet/Visitor/PatternMatcher.h
@@ -0,0 +1,43 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+/**
+ * @brief Since the output positions of operators always start from 0, we have
+ * to offset them if the the boundary expression of is not 0.
+ */
+class PatternMatcher : public Functor<void(void)> {
+  private:
+    Derivator &derivator;
+    bool hasNonZeroRange;
+    const RangeOp originalCur;
+
+  public:
+    PatternMatcher(Derivator &derivator, const RangeOp &cur);
+    /**
+     * @brief Get the Cur whose loop vars are all offset to [0, x). Since
+     * operator outputs start from 0, RangeOp has to be aligned.
+     */
+    RangeOp getOffsetCur();
+    /**
+     * @brief Add outer RangeOp to map the original positions to the new
+     * positions staring from 0.
+     *
+     * @param exprs Tensors from matched exprs
+     */
+    VecExpr applyWrapper(const VecExpr &exprs);
+
+    VecExpr matchWithPattern(const RangeOp &rangeOp, const Pattern &pattern);
+
+  private:
+    VecExpr matchKernel(const Pattern &pattern, const RangeOp &rangeOp,
+                        IteratorTable &exprIT);
+    // get reverse tensor and iterator map ([pattern tensor/iter ID] ->
+    // real)
+    Expr matchKernelWithTensorMap(const Pattern &pattern,
+                                  const RangeOp &rangeOp,
+                                  IteratorTable &exprIT);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/RangeMagnifyVisitor.h b/include/nnet/Visitor/RangeMagnifyVisitor.h
new file mode 100644
index 00000000..f6fa03c1
--- /dev/null
+++ b/include/nnet/Visitor/RangeMagnifyVisitor.h
@@ -0,0 +1,25 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class RangeMagnifyVisitor : public Mutator {
+    vector<VarRangePair> newSumVarRanges;
+    RangeOp newRangeOp;
+
+  public:
+    RangeMagnifyVisitor() : Mutator(0) {}
+    Expr visit_(const RangeOp &c) override;
+    Expr visit_(const Subscript &c) override;
+    /**
+     * @brief
+     *
+     * @param root
+     * @param _newSumVarRanges
+     * @return RangeOp nullptr if failed to magnify
+     */
+    RangeOp magnify(const RangeOp &root,
+                    const vector<VarRangePair> &_newSumVarRanges);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/RangeRelaxFunctor.h b/include/nnet/Visitor/RangeRelaxFunctor.h
new file mode 100644
index 00000000..2dd80c2f
--- /dev/null
+++ b/include/nnet/Visitor/RangeRelaxFunctor.h
@@ -0,0 +1,18 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+using RangeMap = PtrMap<Iterator, Range>;
+class RangeRelaxFunctor : public Functor<RangeMap()> {
+    RangeOp rangeOp;
+
+  public:
+    RangeRelaxFunctor(RangeOp _rangeOp) : Functor(false), rangeOp(_rangeOp) {}
+    RangeMap visit_(const BinaryOp &c) override;
+    RangeMap visit_(const RangeOp &c) override;
+    RangeMap visit_(const Subscript &c) override;
+    RangeMap intersectRangeMaps(const RangeMap &a, const RangeMap &b);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/ReplaceNodeMutator.h b/include/nnet/Visitor/ReplaceNodeMutator.h
new file mode 100644
index 00000000..bdb7fb63
--- /dev/null
+++ b/include/nnet/Visitor/ReplaceNodeMutator.h
@@ -0,0 +1,20 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+// Replace node according to its address the summand
+// Only subscript and tensor are supported now.
+class ReplaceNodeMutator : public Mutator {
+    int nSubscripts = 0;
+    ExprNode *target;
+    Expr replacement;
+
+  public:
+    ReplaceNodeMutator() : Mutator(0) {}
+    Expr visit_(const Subscript &c) override;
+    Expr visit_(const Tensor &c) override;
+    Expr replace(const Expr &root, ExprNode *_target, const Expr &_replace);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/ReplaceVariable.h b/include/nnet/Visitor/ReplaceVariable.h
new file mode 100644
index 00000000..20efbc4f
--- /dev/null
+++ b/include/nnet/Visitor/ReplaceVariable.h
@@ -0,0 +1,33 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+class ReplaceVariable : public Mutator {
+    VecExpr patterns, replacements;
+    map<HashType, int> patternHash;
+
+  public:
+    ReplaceVariable(Expr _pattern, Expr _replacement) : Mutator(false) {
+        set({_pattern}, {_replacement});
+    }
+    ReplaceVariable(const map<string, pair<Expr, Expr>> &mapping)
+        : Mutator(false) {
+        VecExpr _patterns, _replacements;
+        for (const auto &[_, v] : mapping) {
+            _patterns.emplace_back(v.first);
+            _replacements.emplace_back(v.second);
+        }
+        set(_patterns, _replacements);
+    }
+    Expr visit_(const BinaryOp &c) override;
+    // NOT recur to the next stage
+    Expr visit_(const RangeOp &c) override;
+    Expr visit_(const Var &c) override;
+
+  private:
+    void set(VecExpr _pattern, VecExpr _replacement);
+    Expr match(const Expr &c);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/Serializer.h b/include/nnet/Visitor/Serializer.h
new file mode 100644
index 00000000..3568b152
--- /dev/null
+++ b/include/nnet/Visitor/Serializer.h
@@ -0,0 +1,52 @@
+#pragma once
+#include "nlohmann/json_fwd.hpp"
+#include "nnet/visitor.h"
+#include <memory>
+
+namespace nnet {
+
+class Serializer : public Functor<string()> {
+    using json = nlohmann::ordered_json;
+
+  private:
+    static constexpr int VERSION{1};
+    std::unique_ptr<json> jPtr;
+    json &j;
+    static int id;
+
+    string visit_(const Constant &c) override;
+    string visit_(const BinaryOp &c) override;
+    string visit_(const RangeOp &c) override;
+    string visit_(const Subscript &c) override;
+    string visit_(const Var &c) override;
+    string visit_(const Tensor &c) override;
+    string dispatchRoutine(const Routine &c);
+
+    Expr buildExprTree(string key);
+    Routine buildRoutine(string key);
+
+  public:
+    Serializer(int _verobse = 0);
+    virtual ~Serializer();
+
+    /**
+     * @brief Serialize the given expression to json file
+     *
+     * @param expr The expression to be serialized
+     * @param filePath The path of json file to be output
+     * @param msg Message of derivation
+     * @return bool Whether the serialization succeed
+     */
+    bool serialize(const Expr &expr, const string &filePath,
+                   const string &msg = "");
+
+    /**
+     * @brief Deserialize the given json file to expression
+     *
+     * @param filePath The path to file to be deserialized
+     * @return Expression deserialized from the given json file
+     */
+    Expr deserialize(const string &filePath);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/SimplifyExprVisitor.h b/include/nnet/Visitor/SimplifyExprVisitor.h
new file mode 100644
index 00000000..e8ed459c
--- /dev/null
+++ b/include/nnet/Visitor/SimplifyExprVisitor.h
@@ -0,0 +1,38 @@
+#pragma once
+#include "nnet/Visitor/StrideVisitor.h"
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+// Simplify a index expression tree
+class SimplifyExprVisitor : public Functor<void(optional<int> stride)> {
+  private:
+    SubexprSride subexprStride;
+    int constant;
+    PtrMap<Iterator, int> strides; // [var]=strides
+
+    map<pair<Iterator, int>, int, RefValueLess<pair<Iterator, int>>> divStrides,
+        modStrides; // 3*(i%8): [<i,8>]=3
+
+    // For divde and modulo with expr as dividend: 3*((i+1)%8): [<i+1,8>]=3
+    map<pair<Expr, int>, int, RefAddrLess<pair<Expr, int>>> divExprStrides,
+        modExprStrides;
+
+  public:
+    SimplifyExprVisitor() : Functor(0) {}
+    void visit_(const BinaryOp &c, optional<int> stride) override;
+    void visit_(const Var &c, optional<int> stride) override;
+    void visit_(const Constant &c, optional<int> stride) override;
+    PtrMap<Iterator, int> getStrides(const Expr &expr);
+    // TODO [refactor]: move this to SimplifyFormulaMutator as a member func
+    // this class should be get coefficients in a expr
+    Expr simplify(const Expr &expr);
+    int getConstant(const Expr &expr);
+    pair<PtrMap<Iterator, int>, int> getStridesConstant(const Expr &expr);
+    optional<Range> getExprRange(const Expr &expr, const RangeOp &rangeOp);
+    PtrMap<Iterator, int> getStrides() { return strides; }
+    const auto &getDivStrides() { return divStrides; }
+    const auto &getModStrides() { return modStrides; }
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/SimplifyFormulaMutator.h b/include/nnet/Visitor/SimplifyFormulaMutator.h
new file mode 100644
index 00000000..48f92870
--- /dev/null
+++ b/include/nnet/Visitor/SimplifyFormulaMutator.h
@@ -0,0 +1,18 @@
+#pragma once
+#include "nnet/Visitor/StrideVisitor.h"
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+// Simplify all indexes in subscripts in an expression tree
+class SimplifyFormulaMutator : public Mutator {
+    int nSubscripts = 0;
+
+  public:
+    SimplifyFormulaMutator() : Mutator(0) {}
+    Expr visit_(const Subscript &c) override;
+    // Expr visit_(const BinaryOp &c) override;
+    Expr simplify(const Expr &expr);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/Visitor/StrideVisitor.h b/include/nnet/Visitor/StrideVisitor.h
new file mode 100644
index 00000000..dca17569
--- /dev/null
+++ b/include/nnet/Visitor/StrideVisitor.h
@@ -0,0 +1,38 @@
+#pragma once
+#include "nnet/visitor.h"
+
+namespace nnet {
+
+using SubexprSride = map<const ExprNode *, optional<int>>;
+class StrideVisitor : public Functor<optional<int>(void)> {
+  private:
+    SubexprSride subexprStride;
+
+  public:
+    StrideVisitor(int _verobse = 0) : Functor(_verobse) {}
+    optional<int> visit_(const BinaryOp &c) override;
+    optional<int> visit_(const Subscript &c) override;
+    optional<int> visit_(const Var &c) override;
+    optional<int> visit_(const Constant &c) override;
+    // void visit_(const Tensor &c, const Tensor &tensor) override;
+
+    auto getFormulaStride(const RangeOp &e) {
+        subexprStride.clear();
+        // get the location and stride of each iterator
+        auto mulOp = as<BinaryOpNode>(e->getSummand());
+        // TODO [feature]: support complex index exprs
+        if (!mulOp || mulOp->getOpType() != OpType::Mul)
+            nnet_unimplemented_continue();
+        dispatch(mulOp->getLhs());
+        dispatch(mulOp->getRhs());
+        return subexprStride;
+    }
+
+    [[nodiscard]] auto getExprStride(const Expr &e) {
+        subexprStride.clear();
+        dispatch(e);
+        return subexprStride;
+    }
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/common.h b/include/nnet/common.h
new file mode 100644
index 00000000..567c6a27
--- /dev/null
+++ b/include/nnet/common.h
@@ -0,0 +1,77 @@
+#pragma once
+#include "dbg.h"
+#include <cassert>
+#include <list>
+#include <map>
+#include <optional>
+#include <set>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+namespace nnet {
+using std::dynamic_pointer_cast;
+using std::endl;
+using std::list;
+using std::make_pair;
+using std::make_shared;
+using std::make_tuple;
+using std::map;
+using std::max;
+using std::min;
+using std::nullopt;
+using std::optional;
+using std::pair;
+using std::set;
+using std::shared_ptr;
+using std::string;
+using std::tie;
+using std::to_string;
+using std::tuple;
+using std::unique_ptr;
+using std::unordered_map;
+template <typename T> using uset = std::unordered_set<T>;
+using std::vector;
+using std::weak_ptr;
+
+// Aliases
+using dtype = float;
+using HashType = int;
+
+template <typename T> struct ptr_less {
+    bool operator()(const T &lhs, const T &rhs) const { return *lhs < *rhs; }
+};
+
+template <typename T> struct ptr_hash {
+    size_t operator()(const T &lhs) const {
+        return std::hash<decltype(*lhs)>()(*lhs);
+    }
+};
+
+template <typename T> struct ptr_equal {
+    bool operator()(const T &lhs, const T &rhs) const { return *lhs == *rhs; }
+};
+
+static inline HashType genhash(HashType a, HashType b) {
+    return (a * 10007 + b + 12345) % 1000000007;
+}
+
+static inline HashType genhash(string s) {
+    HashType ret = 0;
+    for (auto c : s)
+        ret = genhash(ret, c);
+    return ret;
+}
+
+#define nnet_unimplemented_halt()                                              \
+    { assert(!"Unimplemented"); }
+
+#define nnet_unimplemented_continue()                                          \
+    { dbg("Unimplemented"); }
+
+#define nnet_assert(expr, msg) assert(((void)(msg), (expr)))
+
+std::string pointer_to_hex(void *i);
+} // namespace nnet
diff --git a/include/nnet/dbg.h b/include/nnet/dbg.h
new file mode 100644
index 00000000..f5894e4b
--- /dev/null
+++ b/include/nnet/dbg.h
@@ -0,0 +1,858 @@
+/*****************************************************************************
+
+                                dbg(...) macro
+
+License (MIT):
+
+  Copyright (c) 2019 David Peter <mail@david-peter.de>
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to
+  deal in the Software without restriction, including without limitation the
+  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+  sell copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+  SOFTWARE.
+
+*****************************************************************************/
+
+#ifndef DBG_MACRO_DBG_H
+#define DBG_MACRO_DBG_H
+
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
+#define DBG_MACRO_UNIX
+#elif defined(_MSC_VER)
+#define DBG_MACRO_WINDOWS
+#endif
+
+// #ifndef DBG_MACRO_NO_WARNING
+// #pragma message("WARNING: the 'dbg.h' header is included in your code base")
+// #endif  // DBG_MACRO_NO_WARNING
+
+#include <algorithm>
+#include <chrono>
+#include <ctime>
+#include <iomanip>
+#include <ios>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <vector>
+
+#ifdef DBG_MACRO_UNIX
+#include <unistd.h>
+#endif
+
+#if __cplusplus >= 201703L
+#define DBG_MACRO_CXX_STANDARD 17
+#elif __cplusplus >= 201402L
+#define DBG_MACRO_CXX_STANDARD 14
+#else
+#define DBG_MACRO_CXX_STANDARD 11
+#endif
+
+#if DBG_MACRO_CXX_STANDARD >= 17
+#include <optional>
+#include <variant>
+#endif
+
+namespace dbg {
+
+#ifdef DBG_MACRO_UNIX
+inline bool isColorizedOutputEnabled() { return isatty(fileno(stderr)); }
+#else
+inline bool isColorizedOutputEnabled() { return true; }
+#endif
+
+struct time {};
+
+namespace pretty_function {
+
+// Compiler-agnostic version of __PRETTY_FUNCTION__ and constants to
+// extract the template argument in `type_name_impl`
+
+#if defined(__clang__)
+#define DBG_MACRO_PRETTY_FUNCTION __PRETTY_FUNCTION__
+static constexpr size_t PREFIX_LENGTH =
+    sizeof("const char *dbg::type_name_impl() [T = ") - 1;
+static constexpr size_t SUFFIX_LENGTH = sizeof("]") - 1;
+#elif defined(__GNUC__) && !defined(__clang__)
+#define DBG_MACRO_PRETTY_FUNCTION __PRETTY_FUNCTION__
+static constexpr size_t PREFIX_LENGTH =
+    sizeof("const char* dbg::type_name_impl() [with T = ") - 1;
+static constexpr size_t SUFFIX_LENGTH = sizeof("]") - 1;
+#elif defined(_MSC_VER)
+#define DBG_MACRO_PRETTY_FUNCTION __FUNCSIG__
+static constexpr size_t PREFIX_LENGTH =
+    sizeof("const char *__cdecl dbg::type_name_impl<") - 1;
+static constexpr size_t SUFFIX_LENGTH = sizeof(">(void)") - 1;
+#else
+#error "This compiler is currently not supported by dbg_macro."
+#endif
+
+} // namespace pretty_function
+
+// Formatting helpers
+
+template <typename T> struct print_formatted {
+    static_assert(std::is_integral<T>::value,
+                  "Only integral types are supported.");
+
+    print_formatted(T value, int numeric_base)
+        : inner(value), base(numeric_base) {}
+
+    operator T() const { return inner; }
+
+    const char *prefix() const {
+        switch (base) {
+        case 8:
+            return "0o";
+        case 16:
+            return "0x";
+        case 2:
+            return "0b";
+        default:
+            return "";
+        }
+    }
+
+    T inner;
+    int base;
+};
+
+template <typename T> print_formatted<T> hex(T value) {
+    return print_formatted<T>{value, 16};
+}
+
+template <typename T> print_formatted<T> oct(T value) {
+    return print_formatted<T>{value, 8};
+}
+
+template <typename T> print_formatted<T> bin(T value) {
+    return print_formatted<T>{value, 2};
+}
+
+// Implementation of 'type_name<T>()'
+
+template <typename T> const char *type_name_impl() {
+    return DBG_MACRO_PRETTY_FUNCTION;
+}
+
+template <typename T> struct type_tag {};
+
+template <int &...ExplicitArgumentBarrier, typename T>
+std::string get_type_name(type_tag<T>) {
+    namespace pf = pretty_function;
+
+    std::string type = type_name_impl<T>();
+    return type.substr(pf::PREFIX_LENGTH,
+                       type.size() - pf::PREFIX_LENGTH - pf::SUFFIX_LENGTH);
+}
+
+template <typename T> std::string type_name() {
+    if (std::is_volatile<T>::value) {
+        if (std::is_pointer<T>::value) {
+            return type_name<typename std::remove_volatile<T>::type>() +
+                   " volatile";
+        } else {
+            return "volatile " +
+                   type_name<typename std::remove_volatile<T>::type>();
+        }
+    }
+    if (std::is_const<T>::value) {
+        if (std::is_pointer<T>::value) {
+            return type_name<typename std::remove_const<T>::type>() + " const";
+        } else {
+            return "const " + type_name<typename std::remove_const<T>::type>();
+        }
+    }
+    if (std::is_pointer<T>::value) {
+        return type_name<typename std::remove_pointer<T>::type>() + "*";
+    }
+    if (std::is_lvalue_reference<T>::value) {
+        return type_name<typename std::remove_reference<T>::type>() + "&";
+    }
+    if (std::is_rvalue_reference<T>::value) {
+        return type_name<typename std::remove_reference<T>::type>() + "&&";
+    }
+    return get_type_name(type_tag<T>{});
+}
+
+inline std::string get_type_name(type_tag<short>) { return "short"; }
+
+inline std::string get_type_name(type_tag<unsigned short>) {
+    return "unsigned short";
+}
+
+inline std::string get_type_name(type_tag<long>) { return "long"; }
+
+inline std::string get_type_name(type_tag<unsigned long>) {
+    return "unsigned long";
+}
+
+inline std::string get_type_name(type_tag<std::string>) {
+    return "std::string";
+}
+
+template <typename T>
+std::string get_type_name(type_tag<std::vector<T, std::allocator<T>>>) {
+    return "std::vector<" + type_name<T>() + ">";
+}
+
+template <typename T1, typename T2>
+std::string get_type_name(type_tag<std::pair<T1, T2>>) {
+    return "std::pair<" + type_name<T1>() + ", " + type_name<T2>() + ">";
+}
+
+template <typename... T> std::string type_list_to_string() {
+    std::string result;
+    auto unused = {(result += type_name<T>() + ", ", 0)..., 0};
+    static_cast<void>(unused);
+
+#if DBG_MACRO_CXX_STANDARD >= 17
+    if constexpr (sizeof...(T) > 0) {
+#else
+    if (sizeof...(T) > 0) {
+#endif
+        result.pop_back();
+        result.pop_back();
+    }
+    return result;
+}
+
+template <typename... T> std::string get_type_name(type_tag<std::tuple<T...>>) {
+    return "std::tuple<" + type_list_to_string<T...>() + ">";
+}
+
+template <typename T>
+inline std::string get_type_name(type_tag<print_formatted<T>>) {
+    return type_name<T>();
+}
+
+// Implementation of 'is_detected' to specialize for container-like types
+
+namespace detail_detector {
+
+struct nonesuch {
+    nonesuch() = delete;
+    ~nonesuch() = delete;
+    nonesuch(nonesuch const &) = delete;
+    void operator=(nonesuch const &) = delete;
+};
+
+template <typename...> using void_t = void;
+
+template <class Default, class AlwaysVoid, template <class...> class Op,
+          class... Args>
+struct detector {
+    using value_t = std::false_type;
+    using type = Default;
+};
+
+template <class Default, template <class...> class Op, class... Args>
+struct detector<Default, void_t<Op<Args...>>, Op, Args...> {
+    using value_t = std::true_type;
+    using type = Op<Args...>;
+};
+
+} // namespace detail_detector
+
+template <template <class...> class Op, class... Args>
+using is_detected =
+    typename detail_detector::detector<detail_detector::nonesuch, void, Op,
+                                       Args...>::value_t;
+
+namespace detail {
+
+namespace {
+using std::begin;
+using std::end;
+#if DBG_MACRO_CXX_STANDARD < 17
+template <typename T> constexpr auto size(const T &c) -> decltype(c.size()) {
+    return c.size();
+}
+template <typename T, std::size_t N>
+constexpr std::size_t size(const T (&)[N]) {
+    return N;
+}
+#else
+using std::size;
+#endif
+} // namespace
+
+template <typename T>
+using detect_begin_t = decltype(detail::begin(std::declval<T>()));
+
+template <typename T>
+using detect_end_t = decltype(detail::end(std::declval<T>()));
+
+template <typename T>
+using detect_size_t = decltype(detail::size(std::declval<T>()));
+
+template <typename T> struct is_container {
+    static constexpr bool value =
+        is_detected<detect_begin_t, T>::value &&
+        is_detected<detect_end_t, T>::value &&
+        is_detected<detect_size_t, T>::value &&
+        !std::is_same<std::string,
+                      typename std::remove_cv<typename std::remove_reference<
+                          T>::type>::type>::value;
+};
+
+template <typename T>
+using ostream_operator_t =
+    decltype(std::declval<std::ostream &>() << std::declval<T>());
+
+template <typename T>
+struct has_ostream_operator : is_detected<ostream_operator_t, T> {};
+
+} // namespace detail
+
+// Helper to dbg(…)-print types
+template <typename T> struct print_type {};
+
+template <typename T> print_type<T> type() { return print_type<T>{}; }
+
+// Forward declarations of "pretty_print"
+
+template <typename T>
+inline void pretty_print(std::ostream &stream, const T &value, std::true_type);
+
+template <typename T>
+inline void pretty_print(std::ostream &, const T &, std::false_type);
+
+template <typename T>
+inline typename std::enable_if<!detail::is_container<const T &>::value &&
+                                   !std::is_enum<T>::value,
+                               bool>::type
+pretty_print(std::ostream &stream, const T &value);
+
+inline bool pretty_print(std::ostream &stream, const bool &value);
+
+inline bool pretty_print(std::ostream &stream, const char &value);
+
+template <typename P>
+inline bool pretty_print(std::ostream &stream, P *const &value);
+
+template <typename T, typename Deleter>
+inline bool pretty_print(std::ostream &stream,
+                         std::unique_ptr<T, Deleter> &value);
+
+// template <typename T>
+// inline bool pretty_print(std::ostream& stream, std::shared_ptr<T>& value);
+
+template <size_t N>
+inline bool pretty_print(std::ostream &stream, const char (&value)[N]);
+
+template <>
+inline bool pretty_print(std::ostream &stream, const char *const &value);
+
+template <typename... Ts>
+inline bool pretty_print(std::ostream &stream, const std::tuple<Ts...> &value);
+
+template <>
+inline bool pretty_print(std::ostream &stream, const std::tuple<> &);
+
+template <> inline bool pretty_print(std::ostream &stream, const time &);
+
+template <typename T>
+inline bool pretty_print(std::ostream &stream, const print_formatted<T> &value);
+
+template <typename T>
+inline bool pretty_print(std::ostream &stream, const print_type<T> &);
+
+template <typename Enum>
+inline typename std::enable_if<std::is_enum<Enum>::value, bool>::type
+pretty_print(std::ostream &stream, Enum const &value);
+
+inline bool pretty_print(std::ostream &stream, const std::string &value);
+
+#if DBG_MACRO_CXX_STANDARD >= 17
+
+inline bool pretty_print(std::ostream &stream, const std::string_view &value);
+
+#endif
+
+template <typename T1, typename T2>
+inline bool pretty_print(std::ostream &stream, const std::pair<T1, T2> &value);
+
+#if DBG_MACRO_CXX_STANDARD >= 17
+
+template <typename T>
+inline bool pretty_print(std::ostream &stream, const std::optional<T> &value);
+
+template <typename... Ts>
+inline bool pretty_print(std::ostream &stream,
+                         const std::variant<Ts...> &value);
+
+#endif
+
+template <typename Container>
+inline typename std::enable_if<detail::is_container<const Container &>::value,
+                               bool>::type
+pretty_print(std::ostream &stream, const Container &value);
+
+// Specializations of "pretty_print"
+
+template <typename T>
+inline void pretty_print(std::ostream &stream, const T &value, std::true_type) {
+    stream << value;
+}
+
+template <typename T>
+inline void pretty_print(std::ostream &, const T &, std::false_type) {
+    static_assert(detail::has_ostream_operator<const T &>::value,
+                  "Type does not support the << ostream operator");
+}
+
+template <typename T>
+inline typename std::enable_if<!detail::is_container<const T &>::value &&
+                                   !std::is_enum<T>::value,
+                               bool>::type
+pretty_print(std::ostream &stream, const T &value) {
+    pretty_print(stream, value,
+                 typename detail::has_ostream_operator<const T &>::type{});
+    return true;
+}
+
+inline bool pretty_print(std::ostream &stream, const bool &value) {
+    stream << std::boolalpha << value;
+    return true;
+}
+
+inline bool pretty_print(std::ostream &stream, const char &value) {
+    const bool printable = value >= 0x20 && value <= 0x7E;
+
+    if (printable) {
+        stream << "'" << value << "'";
+    } else {
+        stream << "'\\x" << std::setw(2) << std::setfill('0') << std::hex
+               << std::uppercase << (0xFF & value) << "'";
+    }
+    return true;
+}
+
+template <typename P>
+inline bool pretty_print(std::ostream &stream, P *const &value) {
+    if (value == nullptr) {
+        stream << "nullptr";
+    } else {
+        stream << value;
+    }
+    return true;
+}
+
+template <typename T, typename Deleter>
+inline bool pretty_print(std::ostream &stream,
+                         std::unique_ptr<T, Deleter> &value) {
+    pretty_print(stream, value.get());
+    return true;
+}
+
+// template <typename T>
+// inline bool pretty_print(std::ostream& stream, std::shared_ptr<T>& value) {
+//   pretty_print(stream, value.get());
+//   stream << " (use_count = " << value.use_count() << ")";
+//   return true;
+// }
+
+template <size_t N>
+inline bool pretty_print(std::ostream &stream, const char (&value)[N]) {
+    stream << value;
+    return false;
+}
+
+template <>
+inline bool pretty_print(std::ostream &stream, const char *const &value) {
+    stream << '"' << value << '"';
+    return true;
+}
+
+template <size_t Idx> struct pretty_print_tuple {
+    template <typename... Ts>
+    static void print(std::ostream &stream, const std::tuple<Ts...> &tuple) {
+        pretty_print_tuple<Idx - 1>::print(stream, tuple);
+        stream << ", ";
+        pretty_print(stream, std::get<Idx>(tuple));
+    }
+};
+
+template <> struct pretty_print_tuple<0> {
+    template <typename... Ts>
+    static void print(std::ostream &stream, const std::tuple<Ts...> &tuple) {
+        pretty_print(stream, std::get<0>(tuple));
+    }
+};
+
+template <typename... Ts>
+inline bool pretty_print(std::ostream &stream, const std::tuple<Ts...> &value) {
+    stream << "{";
+    pretty_print_tuple<sizeof...(Ts) - 1>::print(stream, value);
+    stream << "}";
+
+    return true;
+}
+
+template <>
+inline bool pretty_print(std::ostream &stream, const std::tuple<> &) {
+    stream << "{}";
+
+    return true;
+}
+
+template <> inline bool pretty_print(std::ostream &stream, const time &) {
+    using namespace std::chrono;
+
+    const auto now = system_clock::now();
+    const auto us =
+        duration_cast<microseconds>(now.time_since_epoch()).count() % 1000000;
+    const auto hms = system_clock::to_time_t(now);
+    const std::tm *tm = std::localtime(&hms);
+    stream << "current time = " << std::put_time(tm, "%H:%M:%S") << '.'
+           << std::setw(6) << std::setfill('0') << us;
+
+    return false;
+}
+
+// Converts decimal integer to binary string
+template <typename T> std::string decimalToBinary(T n) {
+    const size_t length = 8 * sizeof(T);
+    std::string toRet;
+    toRet.resize(length);
+
+    for (size_t i = 0; i < length; ++i) {
+        const auto bit_at_index_i = static_cast<char>((n >> i) & 1);
+        toRet[length - 1 - i] = bit_at_index_i + '0';
+    }
+
+    return toRet;
+}
+
+template <typename T>
+inline bool pretty_print(std::ostream &stream,
+                         const print_formatted<T> &value) {
+    if (value.inner < 0) {
+        stream << "-";
+    }
+    stream << value.prefix();
+
+    // Print using setbase
+    if (value.base != 2) {
+        stream << std::setw(sizeof(T)) << std::setfill('0')
+               << std::setbase(value.base) << std::uppercase;
+
+        if (value.inner >= 0) {
+            // The '+' sign makes sure that a uint_8 is printed as a number
+            stream << +value.inner;
+        } else {
+            using unsigned_type = typename std::make_unsigned<T>::type;
+            stream << +(static_cast<unsigned_type>(-(value.inner + 1)) + 1);
+        }
+    } else {
+        // Print for binary
+        if (value.inner >= 0) {
+            stream << decimalToBinary(value.inner);
+        } else {
+            using unsigned_type = typename std::make_unsigned<T>::type;
+            stream << decimalToBinary<unsigned_type>(
+                static_cast<unsigned_type>(-(value.inner + 1)) + 1);
+        }
+    }
+
+    return true;
+}
+
+template <typename T>
+inline bool pretty_print(std::ostream &stream, const print_type<T> &) {
+    stream << type_name<T>();
+
+    stream << " [sizeof: " << sizeof(T) << " byte, ";
+
+    stream << "trivial: ";
+    if (std::is_trivial<T>::value) {
+        stream << "yes";
+    } else {
+        stream << "no";
+    }
+
+    stream << ", standard layout: ";
+    if (std::is_standard_layout<T>::value) {
+        stream << "yes";
+    } else {
+        stream << "no";
+    }
+    stream << "]";
+
+    return false;
+}
+
+template <typename Enum>
+inline typename std::enable_if<std::is_enum<Enum>::value, bool>::type
+pretty_print(std::ostream &stream, Enum const &value) {
+    using UnderlyingType = typename std::underlying_type<Enum>::type;
+    stream << static_cast<UnderlyingType>(value);
+
+    return true;
+}
+
+inline bool pretty_print(std::ostream &stream, const std::string &value) {
+    stream << '"' << value << '"';
+    return true;
+}
+
+#if DBG_MACRO_CXX_STANDARD >= 17
+
+inline bool pretty_print(std::ostream &stream, const std::string_view &value) {
+    stream << '"' << std::string(value) << '"';
+    return true;
+}
+
+#endif
+
+template <typename T1, typename T2>
+inline bool pretty_print(std::ostream &stream, const std::pair<T1, T2> &value) {
+    stream << "{";
+    pretty_print(stream, value.first);
+    stream << ", ";
+    pretty_print(stream, value.second);
+    stream << "}";
+    return true;
+}
+
+#if DBG_MACRO_CXX_STANDARD >= 17
+
+template <typename T>
+inline bool pretty_print(std::ostream &stream, const std::optional<T> &value) {
+    if (value) {
+        stream << '{';
+        pretty_print(stream, *value);
+        stream << '}';
+    } else {
+        stream << "nullopt";
+    }
+
+    return true;
+}
+
+template <typename... Ts>
+inline bool pretty_print(std::ostream &stream,
+                         const std::variant<Ts...> &value) {
+    stream << "{";
+    std::visit([&stream](auto &&arg) { pretty_print(stream, arg); }, value);
+    stream << "}";
+
+    return true;
+}
+
+#endif
+
+template <typename Container>
+inline typename std::enable_if<detail::is_container<const Container &>::value,
+                               bool>::type
+pretty_print(std::ostream &stream, const Container &value) {
+    stream << "{";
+    const size_t size = detail::size(value);
+    const size_t n = std::min(size_t{10}, size);
+    size_t i = 0;
+    using std::begin;
+    using std::end;
+    for (auto it = begin(value); it != end(value) && i < n; ++it, ++i) {
+        pretty_print(stream, *it);
+        if (i != n - 1) {
+            stream << ", ";
+        }
+    }
+
+    if (size > n) {
+        stream << ", ...";
+        stream << " size:" << size;
+    }
+
+    stream << "}";
+    return true;
+}
+
+template <typename T, typename... U> struct last {
+    using type = typename last<U...>::type;
+};
+
+template <typename T> struct last<T> { using type = T; };
+
+template <typename... T> using last_t = typename last<T...>::type;
+
+class DebugOutput {
+  public:
+    // Helper alias to avoid obscure type `const char* const*` in signature.
+    using expr_t = const char *;
+
+    DebugOutput(const char *filepath, int line, const char *function_name)
+        : m_use_colorized_output(isColorizedOutputEnabled()) {
+        std::string path = filepath;
+        const std::size_t path_length = path.length();
+        if (path_length > MAX_PATH_LENGTH) {
+            path = ".." +
+                   path.substr(path_length - MAX_PATH_LENGTH, MAX_PATH_LENGTH);
+        }
+        std::stringstream ss;
+        ss << ansi(ANSI_DEBUG) << "[" << path << ":" << line << " ("
+           << function_name << ")] " << ansi(ANSI_RESET);
+        m_location = ss.str();
+    }
+
+    template <typename... T>
+    auto print(std::initializer_list<expr_t> exprs,
+               std::initializer_list<std::string> types, T &&...values)
+        -> last_t<T...> {
+        if (exprs.size() != sizeof...(values)) {
+            std::cerr << m_location << ansi(ANSI_WARN)
+                      << "The number of arguments mismatch, please check "
+                         "unprotected comma"
+                      << ansi(ANSI_RESET) << std::endl;
+        }
+        return print_impl(exprs.begin(), types.begin(),
+                          std::forward<T>(values)...);
+    }
+
+  private:
+    template <typename T>
+    T &&print_impl(const expr_t *expr, const std::string *type, T &&value) {
+        const T &ref = value;
+        std::stringstream stream_value;
+        const bool print_expr_and_type = pretty_print(stream_value, ref);
+
+        std::stringstream output;
+        output << m_location;
+        if (print_expr_and_type) {
+            output << ansi(ANSI_EXPRESSION) << *expr << ansi(ANSI_RESET)
+                   << " = ";
+        }
+        output << ansi(ANSI_VALUE) << stream_value.str() << ansi(ANSI_RESET);
+        if (print_expr_and_type) {
+            output << " (" << ansi(ANSI_TYPE) << *type << ansi(ANSI_RESET)
+                   << ")";
+        }
+        output << std::endl;
+        std::cerr << output.str();
+
+        return std::forward<T>(value);
+    }
+
+    template <typename T, typename... U>
+    auto print_impl(const expr_t *exprs, const std::string *types, T &&value,
+                    U &&...rest) -> last_t<T, U...> {
+        print_impl(exprs, types, std::forward<T>(value));
+        return print_impl(exprs + 1, types + 1, std::forward<U>(rest)...);
+    }
+
+    const char *ansi(const char *code) const {
+        if (m_use_colorized_output) {
+            return code;
+        } else {
+            return ANSI_EMPTY;
+        }
+    }
+
+    const bool m_use_colorized_output;
+
+    std::string m_location;
+
+    static constexpr std::size_t MAX_PATH_LENGTH = 20;
+
+    static constexpr const char *const ANSI_EMPTY = "";
+    static constexpr const char *const ANSI_DEBUG = "\x1b[02m";
+    static constexpr const char *const ANSI_WARN = "\x1b[33m";
+    static constexpr const char *const ANSI_EXPRESSION = "\x1b[36m";
+    static constexpr const char *const ANSI_VALUE = "\x1b[01m";
+    static constexpr const char *const ANSI_TYPE = "\x1b[32m";
+    static constexpr const char *const ANSI_RESET = "\x1b[0m";
+};
+
+// Identity function to suppress "-Wunused-value" warnings in DBG_MACRO_DISABLE
+// mode
+template <typename T> T &&identity(T &&t) { return std::forward<T>(t); }
+
+template <typename T, typename... U>
+auto identity(T &&, U &&...u) -> last_t<U...> {
+    return identity(std::forward<U>(u)...);
+}
+
+} // namespace dbg
+
+#ifndef DBG_MACRO_DISABLE
+
+// Force expanding argument with commas for MSVC, ref:
+// https://stackoverflow.com/questions/35210637/macro-expansion-argument-with-commas
+// Note that "args" should be a tuple with parentheses, such as "(e1, e2, ...)".
+#define DBG_IDENTITY(x) x
+#define DBG_CALL(fn, args) DBG_IDENTITY(fn args)
+
+#define DBG_CAT_IMPL(_1, _2) _1##_2
+#define DBG_CAT(_1, _2) DBG_CAT_IMPL(_1, _2)
+
+#define DBG_16TH_IMPL(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13,  \
+                      _14, _15, _16, ...)                                      \
+    _16
+#define DBG_16TH(args) DBG_CALL(DBG_16TH_IMPL, args)
+#define DBG_NARG(...)                                                          \
+    DBG_16TH(                                                                  \
+        (__VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
+
+// DBG_VARIADIC_CALL(fn, data, e1, e2, ...) => fn_N(data, (e1, e2, ...))
+#define DBG_VARIADIC_CALL(fn, data, ...)                                       \
+    DBG_CAT(fn##_, DBG_NARG(__VA_ARGS__))(data, (__VA_ARGS__))
+
+// (e1, e2, e3, ...) => e1
+#define DBG_HEAD_IMPL(_1, ...) _1
+#define DBG_HEAD(args) DBG_CALL(DBG_HEAD_IMPL, args)
+
+// (e1, e2, e3, ...) => (e2, e3, ...)
+#define DBG_TAIL_IMPL(_1, ...) (__VA_ARGS__)
+#define DBG_TAIL(args) DBG_CALL(DBG_TAIL_IMPL, args)
+
+#define DBG_MAP_1(fn, args) DBG_CALL(fn, args)
+#define DBG_MAP_2(fn, args) fn(DBG_HEAD(args)), DBG_MAP_1(fn, DBG_TAIL(args))
+#define DBG_MAP_3(fn, args) fn(DBG_HEAD(args)), DBG_MAP_2(fn, DBG_TAIL(args))
+#define DBG_MAP_4(fn, args) fn(DBG_HEAD(args)), DBG_MAP_3(fn, DBG_TAIL(args))
+#define DBG_MAP_5(fn, args) fn(DBG_HEAD(args)), DBG_MAP_4(fn, DBG_TAIL(args))
+#define DBG_MAP_6(fn, args) fn(DBG_HEAD(args)), DBG_MAP_5(fn, DBG_TAIL(args))
+#define DBG_MAP_7(fn, args) fn(DBG_HEAD(args)), DBG_MAP_6(fn, DBG_TAIL(args))
+#define DBG_MAP_8(fn, args) fn(DBG_HEAD(args)), DBG_MAP_7(fn, DBG_TAIL(args))
+#define DBG_MAP_9(fn, args) fn(DBG_HEAD(args)), DBG_MAP_8(fn, DBG_TAIL(args))
+#define DBG_MAP_10(fn, args) fn(DBG_HEAD(args)), DBG_MAP_9(fn, DBG_TAIL(args))
+#define DBG_MAP_11(fn, args) fn(DBG_HEAD(args)), DBG_MAP_10(fn, DBG_TAIL(args))
+#define DBG_MAP_12(fn, args) fn(DBG_HEAD(args)), DBG_MAP_11(fn, DBG_TAIL(args))
+#define DBG_MAP_13(fn, args) fn(DBG_HEAD(args)), DBG_MAP_12(fn, DBG_TAIL(args))
+#define DBG_MAP_14(fn, args) fn(DBG_HEAD(args)), DBG_MAP_13(fn, DBG_TAIL(args))
+#define DBG_MAP_15(fn, args) fn(DBG_HEAD(args)), DBG_MAP_14(fn, DBG_TAIL(args))
+#define DBG_MAP_16(fn, args) fn(DBG_HEAD(args)), DBG_MAP_15(fn, DBG_TAIL(args))
+
+// DBG_MAP(fn, e1, e2, e3, ...) => fn(e1), fn(e2), fn(e3), ...
+#define DBG_MAP(fn, ...) DBG_VARIADIC_CALL(DBG_MAP, fn, __VA_ARGS__)
+
+#define DBG_STRINGIFY_IMPL(x) #x
+#define DBG_STRINGIFY(x) DBG_STRINGIFY_IMPL(x)
+
+#define DBG_TYPE_NAME(x) dbg::type_name<decltype(x)>()
+
+#define dbg(...)                                                               \
+    dbg::DebugOutput(__FILE__, __LINE__, __func__)                             \
+        .print({DBG_MAP(DBG_STRINGIFY, __VA_ARGS__)},                          \
+               {DBG_MAP(DBG_TYPE_NAME, __VA_ARGS__)}, __VA_ARGS__)
+#else
+#define dbg(...) dbg::identity(__VA_ARGS__)
+#endif // DBG_MACRO_DISABLE
+
+#endif // DBG_MACRO_DBG_H
\ No newline at end of file
diff --git a/include/nnet/derivator.h b/include/nnet/derivator.h
new file mode 100644
index 00000000..2ef3622a
--- /dev/null
+++ b/include/nnet/derivator.h
@@ -0,0 +1,156 @@
+#pragma once
+#include "common.h"
+#include "expr.h"
+#include "iterator_table.h"
+#include "routine.h"
+#include <iostream>
+#include <sstream>
+#include <unordered_set>
+
+namespace nnet {
+
+class Formula {
+  public:
+    Expr root;
+    const int bfsDepth;
+
+  public:
+    Formula(Expr _root, int _bfsDepth) : root(_root), bfsDepth(_bfsDepth) {}
+    string toReadable() const;
+    friend std::ostream &operator<<(std::ostream &ios, const Formula &expr) {
+        ios << expr.toReadable();
+        return ios;
+    }
+    bool isVariable() const { return as<VarNode>(root) != nullptr; }
+};
+
+class MultiFormulas {
+  public:
+    VecExpr roots;
+    const int bfsDepth;
+
+  public:
+    MultiFormulas(VecExpr roots, int _bfsDepth)
+        : roots(roots), bfsDepth(_bfsDepth) {}
+    // string toReadable() const;
+    // friend std::ostream &operator<<(std::ostream &ios, const Formula &expr) {
+    //     ios << expr.toReadable();
+    //     return ios;
+    // }
+};
+
+class Derivator {
+  public:
+    enum class LogMode { Normal, DumpFristCandiate, NoLog };
+    enum class PassMode { Debug, Full };
+
+  private:
+    list<Formula> candidates;
+    const int maxDepth;
+    int nIteratorNames = 0;
+    int nTensorNames = 0;
+    vector<vector<int>> rulesOverall;
+    enum class Strategy { DFS, Rule, RuleAndDFS } searchStrategy;
+    LogMode logMode;
+    PassMode passMode;
+    bool enableEquivalenceCheck = false;
+    string logFnPrefix;
+    const bool enableHashPruning;
+    int searchedMaxDepth = 0;
+    RoutineType targetOp = RoutineType::NoneType;
+    map<int, vector<Var>> substituteRules;
+
+    vector<int> cntAppliedRules;
+    int cntRule3 = 0;
+    std::unordered_set<HashType> visited;
+    VecExpr intermediateStates;
+    vector<string> ruleStates, ruleMsgs;
+    int cntStates = 0;   // the number of intermediate states
+    int searchState = 0; // search state in guided search
+
+  public:
+    Derivator(int maxDepth = 8, bool enableHashPruning = true,
+              LogMode mode = LogMode::NoLog,
+              PassMode passMode = PassMode::Debug);
+    void search(Formula &origin, int depth);
+    void ruleBasedDFS(Formula &origin, int depth, vector<int> _rules,
+                      map<int, vector<Var>> _substituteRules = {},
+                      bool searchAfterRules = false);
+    void guidedSearch(Formula &origin, int depth);
+    void print();
+    int getNumCandidates() const { return candidates.size(); }
+    const auto &getCandidates() const { return candidates; }
+    void appendCanddiate(const Tensor &tensor, int depth);
+    int getSearchedMaxDepth() const { return searchedMaxDepth; };
+    bool stageCombination(MultiFormulas &origin, int depth);
+    bool checkOOB(const RangeOp &rangeOp, bool halt = true);
+
+    string newTensorName();
+    Var getNewVar();
+
+    Expr mergeMemboundStages(VecExpr stages);
+
+  private:
+    void dfs(Formula &origin, int depth);
+    void ruleBasedDerivate(Formula &origin, int depth);
+
+    void rule1VariableSplit(Formula &origin, int depth, Expr &rCur);
+    void rule2VariableMerging(Formula &origin, int depth, Expr &rCur);
+    void rule3StageSplit(Formula &origin, int dfsDepth, Expr &rCur);
+    void rule5RangeRelaxation(Formula &origin, int depth, Expr &rCur);
+    bool rule4StageMerging(Formula &origin, int depth, Expr &rCur,
+                           bool mergeStageWithCalc = false);
+    void rule6KenerlMatching(Formula &origin, int depth, Expr &rCur);
+    void rule7DLT(Formula &origin, int depth, Expr &rCur);
+    // Rule 8: guidedDLT
+    void rule8GuidedDLT(Formula &origin, int depth, Expr &rCur);
+    void rule9RangeMagnify(Formula &origin, int depth, Expr &rCur);
+    void rule90TwoStageElementWise(Formula &origin, int depth, Expr &rCur);
+    void rule91MergeStagesWithSum(Formula &origin, int depth, Expr &rCur);
+    /**
+     * @brief For searchState=2, wrap the RangeOp to add offset, if the boundary
+     * does not start from 0. Then match the inner offset RangeOp.
+     */
+    void matchComputationKernel(Formula &origin, int depth, Expr &rcur);
+    /**
+     * @brief For searchState=3, the Formula must be a MemBound kernel?
+     */
+    void matchMemBoundKernel(Formula &origin, int depth, Expr &rcur);
+
+    /**
+     * @brief Check the equivalence for exprs in intermediateStates.
+     */
+    void checkDerivationEquivalence();
+
+  public:
+    void pushIntermediateState(const Expr &expr);
+    void pushRuleState(const string &state);
+    void pushRuleMsg(const string &state);
+    void popIntermediateState();
+    void popRuleState();
+    void popRuleMsg();
+    // void pushTransformInfo(const Expr &expr, const string &state,
+    //                        const string &msg);
+    void nextStep(Formula &origin, int depth, Expr &rCur, Expr newCur);
+
+    RoutineType getTargetOp();
+    void setTargetOp(RoutineType _targetOp);
+
+    int getSearchState();
+    void setSearchState(int _searchState);
+    int getNumIntermediateStates();
+    void printStatistics();
+    void printIntermediateStates();
+    /**
+     * @brief Enable dumping the first results. Verification is enabled so it is
+     * slow.
+     *
+     * @param _logFnPrefix Prefix of output filename
+     */
+    void setDumpFirstSuccess(const string &_logFnPrefix);
+    void setEquivalenceCheck();
+    PassMode getPassMode();
+    LogMode getLogMode();
+};
+
+} // namespace nnet
diff --git a/include/nnet/dlt.h b/include/nnet/dlt.h
new file mode 100644
index 00000000..4e5e56ce
--- /dev/null
+++ b/include/nnet/dlt.h
@@ -0,0 +1,53 @@
+#pragma once
+#include "common.h"
+#include "expr.h"
+#include <iostream>
+
+namespace nnet {
+
+// enum class DLTType { Split, Merge, Reorder };
+
+struct DLTOperation {
+    // DLTType type;
+    virtual ~DLTOperation() {}
+};
+struct DLTSplit : DLTOperation {
+    int dim, factor;
+    DLTSplit(int _dim, int _factor) : dim(_dim), factor(_factor) {}
+};
+struct DLTMerge : DLTOperation {
+    int dim0, dim1;
+    DLTMerge(int _dim0, int _dim1) : dim0(_dim0), dim1(_dim1) {}
+};
+struct DLTReorder : DLTOperation {
+    vector<int> dims;
+    DLTReorder(vector<int> _dims) : dims(_dims) {}
+};
+
+class DLT {
+    vector<Ref<DLTOperation>> ops;
+
+  public:
+    /**
+     * @brief dim -> (dim/factor, factor)
+     */
+    void split(int dim, int factor);
+    /**
+     * @brief Merge dim1 into dim0 -> (dim0, dim1)
+     */
+    void merge(int dim0, int dim1);
+    /**
+     * @brief
+     *
+     * @param dims dims[new_dim]=old_dim
+     */
+    void reorder(vector<int> dims);
+    optional<Expr> apply(const RangeOp &rangeOp, const Subscript &subscript,
+                         string newTensorName);
+
+  private:
+    optional<pair<Expr, Expr>> splitIndex(Expr expr, int factor,
+                                          RangeOp rangeOp);
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/expr.h b/include/nnet/expr.h
new file mode 100644
index 00000000..5e9a56fc
--- /dev/null
+++ b/include/nnet/expr.h
@@ -0,0 +1,416 @@
+#pragma once
+#include "common.h"
+#include "ref.h"
+#include <iostream>
+#include <numeric>
+#include <type_traits>
+
+namespace nnet {
+
+class ExprNode;
+class VarNode;
+class TensorNode;
+class OperatorNode;
+class RangeOpNode;
+class SubscriptNode;
+class BinaryOpNode;
+class ConstantNode;
+class FuncNode;
+using Expr = Ref<ExprNode>;
+using Var = Ref<VarNode>;
+using Tensor = Ref<TensorNode>;
+using Operator = Ref<OperatorNode>;
+using RangeOp = Ref<RangeOpNode>;
+using Subscript = Ref<SubscriptNode>;
+using BinaryOp = Ref<BinaryOpNode>;
+using Constant = Ref<ConstantNode>;
+using Func = Ref<FuncNode>;
+
+class RoutineNode;
+using Routine = Ref<RoutineNode>;
+enum class RoutineType {
+    NoneType = 100,
+    MatmulNodeType,
+    ConvNodeType,
+    G2bmmNodeType,
+    GbmmNodeType,
+    ElementWiseNodeType // unmatchable
+};
+constexpr inline int MatchableRoutineTypeCnt = 4;
+constexpr inline int RoutineTypeCnt = MatchableRoutineTypeCnt + 1;
+inline RoutineType idToRoutineType(int i) {
+    return static_cast<RoutineType>(i + 1 +
+                                    static_cast<int>(RoutineType::NoneType));
+}
+inline int routineTypeToId(const RoutineType &routineType) {
+    return static_cast<int>(routineType) -
+           static_cast<int>(RoutineType::NoneType) - 1;
+}
+
+using VecExpr = vector<Expr>;
+
+// common data structure
+using Iterator = Var; // RE: remove this alias
+template <typename T, typename U> using PtrMap = std::map<T, U, ptr_less<T>>;
+template <typename T, typename U>
+// When keys are pointers, compare keys according to its value instead of
+// address Specially, the name of Var are compared due to the overload of op=
+// and hash.
+using PtrUmap = std::unordered_map<T, U, ptr_hash<T>, ptr_equal<T>>;
+template <typename T>
+using PtrUset = std::unordered_set<T, ptr_hash<T>, ptr_equal<T>>;
+using Appearance = PtrMap<Var, vector<pair<Tensor, int>>>;
+using StrideTable =
+    PtrMap<Var, vector<tuple<TensorNode *, int, int>>>; // Tensor, dim, stride
+
+// AST node opeartor
+bool operator==(const Var &lhs, const string &rhs);
+bool operator==(const string &lhs, const Var &rhs);
+Expr operator+(const Expr &lhs, const Expr &rhs);
+BinaryOp operator-(const Expr &lhs, const Expr &rhs);
+BinaryOp operator*(const Expr &lhs, const Expr &rhs);
+BinaryOp operator/(const Expr &lhs, const Expr &rhs);
+BinaryOp operator%(const Expr &lhs, const Expr &rhs);
+
+Expr operator+(const Expr &lhs, const int &rhs);
+Expr operator+(const int &lhs, const Expr &rhs);
+Expr operator-(const Expr &lhs, const int &rhs);
+Expr operator-(const int &lhs, const Expr &rhs);
+Expr operator*(const Expr &lhs, const int &rhs);
+Expr operator*(const int &lhs, const Expr &rhs);
+Expr operator%(const Expr &lhs, const int rhs);
+Expr operator/(const Expr &lhs, const int rhs);
+
+string serializeVec(vector<Expr> v);
+string serializeVec(vector<Var> v);
+template <typename T> inline string serializeVec(vector<T> v) {
+    if (v.empty())
+        return "[]";
+    return "[" +
+           std::accumulate(
+               v.begin() + 1, v.end(), to_string(v[0]),
+               [](const string &a, int b) { return a + ',' + to_string(b); }) +
+           "]";
+}
+
+// For RTTI and visitor pattern
+enum class NodeType {
+    ConstantNodeType,
+    BinaryOpNodeType,
+    RangeOpNodeType,
+    SubscriptNodeType,
+    TensorNodeType,
+    VarNodeType,
+    FuncNodeType
+};
+
+enum class FuncType { Relu, Tanh };
+
+#define DEFINE_GETTYPE(CLASS)                                                  \
+    NodeType getType() const override { return NodeType::CLASS##Type; }
+
+class ExprNode {
+  public:
+    virtual ~ExprNode() {}
+    ExprNode &operator=(const ExprNode &rhs) = delete;
+
+    virtual HashType hash() const = 0; // RE: remove?
+    virtual string toReadable() const = 0;
+    friend std::ostream &operator<<(std::ostream &ios, const ExprNode &expr);
+
+    virtual NodeType getType() const = 0;
+};
+
+class VarNode : public ExprNode {
+    std::string name;
+
+  public:
+    VarNode(std::string _name) : name(_name){};
+    virtual ~VarNode() {}
+    DEFINE_GETTYPE(VarNode);
+
+    const std::string &getName() const { return name; }
+    HashType hash() const override { return genhash(name); };
+    string toReadable() const override { return name; };
+    bool equal(const Var &rhs) const { return name == rhs->getName(); }
+    bool neq(const Var &rhs) const { return !equal(rhs); }
+    bool less(const Var &rhs) const { return name < rhs->getName(); }
+    bool equal(const string &rhs) const { return name == rhs; }
+    bool operator==(const VarNode &rhs) const { return name == rhs.getName(); }
+    bool operator<(const VarNode &rhs) const { return name < rhs.getName(); }
+};
+
+enum class TensorType { Input, Weight, Intermediate };
+
+class TensorNode : public ExprNode {
+    string name;
+    vector<int> shape, paddings;
+    TensorType type;
+    Routine source; // if NO source, then this is a input/weight tensor
+
+  public:
+    TensorNode(string _name, vector<int> _shape, vector<int> _paddings = {},
+               Routine _source = nullptr);
+    virtual ~TensorNode() {}
+    DEFINE_GETTYPE(TensorNode);
+
+    bool operator==(const string &rhs) { return name == rhs; }
+    friend bool operator==(const string &lhs, const TensorNode &rhs) {
+        return lhs == rhs.name;
+    }
+
+    HashType hash() const override { return genhash(name); }
+    string toReadable() const override;
+    string toOutputShape() const;
+    const std::string &getName() const { return name; }
+    std::vector<int> &getPadding() { return paddings; }
+    int getPadding(int i) const { return paddings[i]; }
+    const vector<int> &getPaddings() const { return paddings; }
+    void setPadding(int i, int p) { paddings[i] = p; }
+    const vector<int> &getShape() const { return shape; }
+    int getShape(int i) const { return shape[i]; }
+    int64_t getSize() const;
+    int getDims() const { return shape.size(); }
+    const Routine &getSource() const { return source; }
+    int getData(const Ref<vector<int>> &data, const vector<int> &idx);
+    size_t getOffset(const vector<int> &idx);
+};
+
+enum class OpType { Range, Add, Mul, Div, Mod, Sub };
+const char opSymbols[] = "#+*/%-";
+
+class OperatorNode : public ExprNode {
+  protected:
+    const OpType opType;
+    VecExpr subExprs;
+
+  public:
+    OperatorNode(OpType _opType) : opType(_opType){};
+    OperatorNode(OpType _opType, VecExpr _subExprs)
+        : opType(_opType), subExprs(_subExprs){};
+
+    int getSubExprsNum() { return subExprs.size(); };
+    const VecExpr &getSubExprs() { return subExprs; }
+    const Expr &getSubExprs(int i) const { return subExprs[i]; }
+    OpType getOpType() const { return opType; };
+    void setOperands(int i, Expr e) { subExprs[i] = e; }
+};
+
+using Range = pair<int, int>;
+using VarRangePair = pair<Var, Range>;
+inline int getLength(const Range &range) { return range.second - range.first; }
+struct IterationType {
+    enum { Loop, Sum };
+    constexpr static int NumIterationType = 2;
+};
+class RangeOpNode : public OperatorNode {
+  public:
+    enum { Summand, END_POS };
+    constexpr static int Loop = IterationType::Loop;
+    constexpr static int Sum = IterationType::Sum;
+
+  private:
+    vector<VarRangePair> vars[IterationType::NumIterationType];
+    vector<int> paddings;
+
+  public:
+    RangeOpNode(Expr _summand) : OperatorNode(OpType::Range, {_summand}){};
+    RangeOpNode(const vector<VarRangePair> &_loopIters,
+                const vector<VarRangePair> &_sumIters, Expr _summand,
+                const vector<int> &paddings)
+        : OperatorNode(OpType::Range, {_summand}), vars{_loopIters, _sumIters},
+          paddings(paddings){};
+    DEFINE_GETTYPE(RangeOpNode);
+
+    virtual HashType hash() const override {
+        nnet_unimplemented_halt();
+        return 0;
+    };
+    string toReadable() const override;
+    const Expr &getSummand() const { return subExprs[Summand]; }
+    const vector<VarRangePair> &getVarRanges(int _index) const {
+        return vars[_index];
+    }
+    const vector<VarRangePair> &getLoopVarRanges() const {
+        return vars[IterationType::Loop];
+    }
+    const vector<VarRangePair> &getSumVarRanges() const {
+        return vars[IterationType::Sum];
+    }
+    int getNumOutputDims() const;
+    bool hasVar(int index, Var name) const;
+    bool hasLoopVar(Var name) const { return hasVar(Loop, name); }
+    bool hasSumVar(Var name) const { return hasVar(Sum, name); }
+    bool hasLoopVar(string name) const {
+        return hasVar(Loop, make_ref<VarNode>(name));
+    }
+    bool hasSumVar(string name) const {
+        return hasVar(Sum, make_ref<VarNode>(name));
+    }
+    int getVarIndex(int type, string name);
+    void setSummand(Expr e) { subExprs[Summand] = e; }
+    void setLoopIterator(const vector<VarRangePair> &vecExpr) {
+        vars[Loop] = vecExpr;
+    }
+    void setSumIterator(const vector<VarRangePair> &vecExpr) {
+        vars[Sum] = vecExpr;
+    }
+    void setIterator(const vector<VarRangePair> &loop,
+                     const vector<VarRangePair> &sum) {
+        setLoopIterator(loop);
+        setSumIterator(sum);
+    }
+
+    const VarRangePair &getVarRange(int _index, int i) const {
+        return vars[_index][i];
+    }
+    const Var &getLoopVar(int i) const { return vars[Loop][i].first; }
+    Range getRange(const Var &var) const;
+    VarRangePair getVarRange(const Var &var) const;
+    bool hasPaddings() const;
+    int getPaddings(int dim) const;
+    vector<int> getPaddings() const;
+    void setPaddings(vector<int> _paddings);
+    void setVarRange(int _index, int i, VarRangePair pair) {
+        vars[_index][i] = pair;
+    }
+    int64_t getFlops() const;
+    int64_t getInputSize(const RangeOp &self) const;
+    int64_t getOutputSize() const;
+    vector<int> getOutputShape() const;
+    // Including paddings
+    vector<Range> getOutputRanges() const;
+};
+
+class BinaryOpNode : public OperatorNode {
+    enum { LHS, RHS, END_POS };
+
+  public:
+    BinaryOpNode(OpType _opType, Expr _lhs, Expr _rhs)
+        : OperatorNode(_opType, {_lhs, _rhs}){};
+    virtual ~BinaryOpNode() {}
+    DEFINE_GETTYPE(BinaryOpNode);
+
+    virtual HashType hash() const override {
+        return genhash((HashType)opType,
+                       genhash(subExprs[LHS]->hash(), subExprs[RHS]->hash()));
+    };
+    virtual string toReadable() const override;
+    const Expr &getLhs() const { return getSubExprs(LHS); };
+    const Expr &getRhs() const { return getSubExprs(RHS); };
+    void setLhs(Expr e) { setOperands(LHS, e); };
+    void setRhs(Expr e) { setOperands(RHS, e); };
+    // If Var/constant, use this one
+    optional<pair<Var, int>> getModDivParameter() const;
+    // If (Var+constant)/constant, use this one
+    pair<Expr, int> getModDivExpr() const;
+    bool isSwapable() const;
+};
+
+class ConstantNode : public ExprNode {
+    int val;
+
+  public:
+    ConstantNode(int _val) : val(_val){};
+    ConstantNode(const ConstantNode &rhs) : ExprNode(rhs), val(rhs.val){};
+    virtual ~ConstantNode() {}
+    DEFINE_GETTYPE(ConstantNode);
+
+    int getValue() const { return val; }
+    virtual HashType hash() const override { return genhash(val, 6214587); };
+    virtual string toReadable() const override {
+        string ret;
+        ret += std::to_string(val);
+        return ret;
+    };
+};
+
+class SubscriptNode : public ExprNode {
+  protected:
+    Expr indexed;
+    VecExpr subExprs;
+
+  public:
+    SubscriptNode(Expr _indexed, vector<Expr> _subExprs) : subExprs(_subExprs) {
+        setObject(_indexed);
+    };
+    DEFINE_GETTYPE(SubscriptNode);
+
+    virtual HashType hash() const override {
+        nnet_unimplemented_continue();
+        return -1;
+    };
+    virtual string toReadable() const override;
+
+    size_t getDims() const { return subExprs.size(); }
+    const VecExpr &getIndex() const { return subExprs; }
+    const Expr &getIndex(size_t i) const { return subExprs[i]; }
+    void setIndex(size_t i, Expr e) { subExprs[i] = e; }
+    Expr *getObjectPtr() { return &indexed; }
+    Expr getObject() const { return indexed; }
+    void setObject(Expr e);
+    bool isRangeOpSubscripted() const;
+    bool isTensorSubscripted() const { return !isRangeOpSubscripted(); }
+    // Get the ranges of objects including paddings
+    vector<Range> getObjectRangesWithPaddings() const;
+    vector<Range> getObjectRangesWithoutPaddings() const;
+};
+
+class FuncNode : public ExprNode {
+  protected:
+    Subscript object;
+    FuncType funcType;
+
+  public:
+    FuncNode(Expr object, FuncType funcType) : funcType(funcType) {
+        setObject(object);
+    }
+    DEFINE_GETTYPE(FuncNode);
+
+    virtual HashType hash() const override {
+        nnet_unimplemented_continue();
+        return -1;
+    };
+    virtual string toReadable() const override;
+
+    const Subscript &getObject() const { return object; }
+    void setObject(Expr e);
+
+    FuncType getFuncType() const { return funcType; }
+};
+
+// Wrappers for type deduction
+Subscript makeSubscript(const Expr &tensor, const VecExpr &subscripts);
+RangeOp makeRangeOperator(const vector<VarRangePair> &_loopIters,
+                          const vector<VarRangePair> &_sumIters, Expr _summand,
+                          const vector<int> &paddings = {});
+Tensor makeTensor(const string &name, const vector<int> &shape,
+                  const vector<int> &paddings = {},
+                  const Routine &source = nullptr);
+
+// Pretty output for dbg with shared_ptr
+template <typename T, typename std::enable_if_t<std::is_base_of_v<ExprNode, T>>
+                          *_ = nullptr>
+std::ostream &operator<<(std::ostream &os, const shared_ptr<T> &a) {
+    os << ((!a) ? string("nullptr") : a->toReadable());
+    return os;
+}
+
+// Pretty output for dbg with shared_ptr
+template <typename T, typename std::enable_if_t<std::is_base_of_v<ExprNode, T>>
+                          *_ = nullptr>
+std::ostream &operator<<(std::ostream &os, const Ref<T> &a) {
+    os << ((!a) ? string("nullptr") : a->toReadable());
+    return os;
+}
+#undef DEFINE_GETTYPE
+
+} // namespace nnet
+
+namespace std {
+template <> struct hash<nnet::VarNode &> {
+    size_t operator()(const nnet::VarNode &t) const {
+        return std::hash<string>()(t.getName());
+    }
+};
+} // namespace std
diff --git a/include/nnet/iterator_table.h b/include/nnet/iterator_table.h
new file mode 100644
index 00000000..c79224fb
--- /dev/null
+++ b/include/nnet/iterator_table.h
@@ -0,0 +1,234 @@
+#pragma once
+#include "common.h"
+#include "expr.h"
+#include <iostream>
+
+namespace nnet {
+
+using PatternTensorMap = vector<Tensor>;
+using PatternIterRangeMap = PtrMap<Iterator, VarRangePair>;
+
+enum class MismatchType {
+    // Search required (undetermined)
+    MoreVar,
+    LessVar,
+    StrideMismatch,
+    // guided DLT (determined)
+    DLMismatch,
+    OutputDLMismatch,
+    OutputDimismatch
+};
+struct Mismatch {
+    MismatchType type;
+    int bitmap; // Row ID of IT
+    PtrMap<Iterator, Iterator>
+        mappingIter_r; // For DLT mismatch, iters are mapped
+    Mismatch(MismatchType _type, int _bitmap) : type(_type), bitmap(_bitmap) {}
+    Mismatch(MismatchType _type, int _bitmap,
+             PtrMap<Iterator, Iterator> _mappingIter_r)
+        : type(_type), bitmap(_bitmap), mappingIter_r(_mappingIter_r) {}
+};
+class Pattern;
+class IteratorTable {
+  protected:
+    //     using Appearance = map<string, vector<pair<Tensor, int>>>;
+    // using StrideTable = map<TensorNode *, vector<tuple<string, int, int>>>;
+    // // Var, dim, stride
+    RangeOp rangeOp;
+    // To real tensor
+    // FIXME: redundent
+    Appearance appearance;
+    vector<Tensor> tensors;       // original tensor sequence
+    vector<Subscript> subscripts; // original subscripts sequence
+    StrideTable strideTable;      // TODO [Refactor]: rename strideTable
+    PatternIterRangeMap iterToRange;
+
+    // mapping
+    vector<int> tensorMap; // [index for tensors] -> tensorID in pattern
+    PtrMap<Iterator, Iterator> iterMap; // [expr iter] -> pattern iter
+
+    // final data
+    vector<vector<Iterator>> posTable; // [Tensor bitmap]=[Iterator]
+    vector<vector<vector<Iterator>>>
+        iterInTensorDim; // [tensorID][dimOfTensor]=[Iterator],
+                         // stride in each dim may be add
+
+    vector<vector<PtrMap<Iterator, int>>>
+        strideInDim; // [tensorID][dimOfTensor][Iterator]=stride,
+                     // stride in each dim may be add
+
+    PtrMap<Iterator, vector<int>> strideInTensor; // [Iterator][tensorID]=stride
+
+    // final data: auxiliary data
+    vector<int> tensorIDMap_r;
+    PatternTensorMap tensorMap_r;
+    PatternIterRangeMap iterToRange_r;
+
+  public:
+    virtual ~IteratorTable() {}
+    IteratorTable() {}
+    IteratorTable(const IteratorTable &) = delete;
+    [[nodiscard]] bool analyzeExpr(const RangeOp &rangeOp);
+    // mapTensors
+    void buildTable(const vector<int> &_tensorMap);
+    void buildTableWithDefaultMap();
+    /**
+     * @brief Check whether the expression match a pattern. If not, return the
+     * detailed reason for guided search.
+     *
+     * @param patternIT
+     * @return vector<int> mismatched IT rows/tensors for guided DLT.
+     */
+    vector<Mismatch> matchPatternIT(const Pattern &patternIT);
+    void matchIterators();
+    int getNumInputs() const { return tensors.size(); }
+    int getNumTensors() const { return tensors.size() + 1; }
+    int getNumRows() const { return 1 << getNumTensors(); }
+    int getNumIterators() const { return strideTable.size(); }
+    // vector<Tensor> tensorMap_r(
+    //     pattern.nInputs); // [pattern tensor ID] -> real tensor
+    // map<string, VarRangePair> iterToRange_r; // [pattern iter] -> iter &
+    // range
+    auto getTables() const {
+        return tuple(posTable, iterInTensorDim, strideInTensor);
+    }
+    const auto &getStrideInDim() const { return strideInDim; }
+    vector<vector<Iterator>> getIterInTensorDim(int tensorID) const {
+        return iterInTensorDim[tensorID];
+    }
+    const vector<Iterator> &getPosTable(int bitmap) const {
+        return posTable[bitmap];
+    }
+    pair<PatternTensorMap, PatternIterRangeMap> getReverseMap() const;
+
+    int getStridesInTensor(Iterator iter, int tensorID) const;
+    vector<int> getIterDimInTensor(int tensorID, const Iterator &iter) const;
+    Tensor getTensor(int tensorID) const { return tensorMap_r[tensorID]; }
+    Subscript getSubscript(int tensorID) const {
+        return subscripts[tensorIDMap_r[tensorID]];
+    }
+    Range getIterRange(const Iterator &iter) const {
+        return rangeOp->getRange(iter);
+    }
+
+    /**
+     * @brief Check strides of each iterators and there position in tensors.
+     * Since many-to-many iterators matching exist, we take this procudure as a
+     * seperate function to deal with different iterator mapping solution.
+     *
+     * @param patternIT
+     * @param mappingIter_r
+     * @return vector<Mismatch>
+     */
+    vector<Mismatch>
+    matchPatternITCheckStrides(const Pattern &patternIT,
+                               PtrMap<Iterator, Iterator> mappingIter_r);
+    RangeOp getRangeOp() const;
+};
+
+struct StrideConstraint {
+    int tensorID;
+    Var v0, v1;
+    enum class Constraint { SAME, PROPOTIONAL } type;
+};
+
+class Pattern : public IteratorTable {
+    vector<StrideConstraint> strideConstraints;
+
+  public:
+    virtual Expr
+    buildExpr(const Expr &expr, const vector<Tensor> &tensors,
+              [[maybe_unused]] const PatternIterRangeMap &varRanges,
+              string outputName,
+              [[maybe_unused]] const IteratorTable &exprIT) const = 0;
+    /**
+     * @brief Check whether all indexes only are a iterator
+     *
+     * @param tensorID
+     */
+    bool isAllUniqueAccess(int tensorID) const;
+    const auto &getStrideConstraints() const { return strideConstraints; };
+    int calcPadding(const Tensor &tensor, int dim, Range rangeH, Range rangeR,
+                    int offset) const;
+};
+
+class MatmulPattern : public Pattern {
+  public:
+    static const Pattern &getMatmulPattern();
+    static pair<Expr, pair<Tensor, Tensor>> getExpr(bool transA, bool transB,
+                                                    int b, int m, int n, int k);
+
+    Expr buildExpr(const Expr &expr, const vector<Tensor> &tensors,
+                   [[maybe_unused]] const PatternIterRangeMap &varRanges,
+                   string outputName,
+                   [[maybe_unused]] const IteratorTable &exprIT) const override;
+};
+
+class ConvPattern : public Pattern {
+  private:
+    static const Var n, c, h, w, f, r, s;
+
+  public:
+    static const Pattern &getPattern();
+    static Expr getExpr(Tensor A, Tensor K, int n, int c, int h, int w, int f,
+                        int r, int s);
+
+    Expr buildExpr(const Expr &expr, const vector<Tensor> &tensors,
+                   [[maybe_unused]] const PatternIterRangeMap &varRanges,
+                   string outputName,
+                   [[maybe_unused]] const IteratorTable &exprIT) const override;
+};
+
+class ConvTransPattern : public Pattern {
+  private:
+    static const Var n, c, h, w, f, r, s;
+
+  public:
+    static const Pattern &getPattern() = delete;
+    static Expr getExpr(Tensor A, Tensor K, int N, int C, int H, int W, int F,
+                        int R, int S);
+
+    Expr
+    buildExpr(const Expr &expr, const vector<Tensor> &tensors,
+              [[maybe_unused]] const PatternIterRangeMap &varRanges,
+              string outputName,
+              [[maybe_unused]] const IteratorTable &exprIT) const override {
+        nnet_unimplemented_halt();
+        return nullptr;
+    };
+};
+
+class Sg2bmmPattern : public Pattern {
+  private:
+    static const Var b, m, w, k;
+
+  public:
+    static const Pattern &getPattern();
+    static pair<Expr, pair<Tensor, Tensor>> getExpr(int Batch, int M, int K,
+                                                    int W, int D);
+
+    Expr buildExpr(const Expr &expr, const vector<Tensor> &tensors,
+                   [[maybe_unused]] const PatternIterRangeMap &varRanges,
+                   string outputName,
+                   [[maybe_unused]] const IteratorTable &exprIT) const override;
+};
+
+class LongformerGBMMPattern : public Pattern {
+  private:
+    static const Var b, m, w, n;
+
+  public:
+    static const Pattern &getPattern();
+    static pair<Expr, pair<Tensor, Tensor>> getExpr(int Batch, int M, int W,
+                                                    int K, int dilation);
+
+    Expr buildExpr(const Expr &expr, const vector<Tensor> &tensors,
+                   [[maybe_unused]] const PatternIterRangeMap &varRanges,
+                   string outputName,
+                   [[maybe_unused]] const IteratorTable &exprIT) const override;
+};
+
+const Pattern &getPattern(RoutineType targetOp);
+string getPatternName(RoutineType targetOp);
+
+} // namespace nnet
diff --git a/include/nnet/nmutator.h b/include/nnet/nmutator.h
new file mode 100644
index 00000000..23cdfb42
--- /dev/null
+++ b/include/nnet/nmutator.h
@@ -0,0 +1,57 @@
+#pragma once
+#include "core/mutator.h"
+#include "nnet/expr.h"
+
+#ifdef ABC
+
+namespace infini {
+
+class NMutator : public Mutator {
+  private:
+    // Suffix -N: NNet objects.
+    // Suffix -T: tpm objects.
+    // Map: NNet tensors -> tpm tensor.
+    std::map<std::string, Tensor> inputsNameNToTensorT;
+    enum class Mode { Normal, ToNaiveMembound, RuleBased } mode = Mode::Normal;
+    const double bandwidth = double(200) * 1024 * 1024 * 1024;
+    // If in RuleBased mode, use derivationRules in derivator
+    const std::vector<int> derivationRules;
+
+  public:
+    NMutator();
+    NMutator(const std::vector<int> &derivationRules);
+    ~NMutator();
+
+    vector<Graph> run(const Graph &in_graph) override;
+    void setToNaiveMembound();
+
+    void setMaxDepth(int _maxDepth) { maxDepth = _maxDepth; }
+    long long cntStates = 0;
+    long long cntCandidates = 0;
+
+  private:
+    int maxDepth = 8;
+    nnet::Expr opToExpression(Operator op);
+    void runSingleOp(Graph in_graph, std::vector<Graph> &out_graphs);
+
+    /**
+     * @brief Test helper. Converting a single OP to Membound Op for
+     * corretness check.
+     */
+    void runSingleOpToNaiveMembound(Graph in_graph,
+                                    std::vector<Graph> &out_graphs);
+    void runMultipleOps(Graph in_graph, std::vector<Graph> &out_graphs);
+    Graph expressionToGraph(nnet::Expr expr, Graph in_graph);
+    Graph fuseHetConv(nnet::Expr expr, Graph in_graph);
+    double memboundTime(ssize_t cnt);
+    double memboundTime(const Shape &dims);
+
+    Graph transformTConv1x1(Operator op);
+    Graph transformTConv3x3(Operator op);
+    Graph transformDialtedConv(Operator op);
+    Graph transformConv1x1(Operator op);
+    Graph transformConv1xk(Operator op);
+};
+
+} // namespace infini
+#endif
\ No newline at end of file
diff --git a/include/nnet/permutation.h b/include/nnet/permutation.h
new file mode 100644
index 00000000..300a5d6b
--- /dev/null
+++ b/include/nnet/permutation.h
@@ -0,0 +1,38 @@
+#pragma once
+#include "common.h"
+#include "expr.h"
+#include <iostream>
+
+namespace nnet {
+
+class PermutationGenerator {
+    vector<vector<Iterator>> from, to;
+    vector<vector<size_t>> mapping;
+
+  public:
+    PermutationGenerator(vector<vector<Iterator>> _from,
+                         vector<vector<Iterator>> _to);
+    bool next();
+    PtrMap<Iterator, Iterator> get() const;
+};
+
+template <typename T> class SubsetGenerator {
+    vector<T> elements;
+    int n, bitmap;
+
+  public:
+    SubsetGenerator(vector<T> elements, bool nonEmpty = 1)
+        : elements(elements), n(elements.size()), bitmap((nonEmpty > 0)) {
+        assert(n < 10);
+    };
+    bool next() { return ((++bitmap) < (1 << n) - 1); }
+    vector<T> get() const {
+        vector<T> ret;
+        for (int i = 0; i < n; ++i)
+            if (bitmap & (1 << i))
+                ret.emplace_back(elements[i]);
+        return ret;
+    }
+};
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/ref.h b/include/nnet/ref.h
new file mode 100644
index 00000000..c9f26b02
--- /dev/null
+++ b/include/nnet/ref.h
@@ -0,0 +1,200 @@
+#pragma once
+#include "common.h"
+#include <functional> // hash
+#include <memory>
+#include <type_traits>
+
+namespace nnet {
+
+template <typename T> struct is_ref;
+
+/**
+ * Ref-counting pointer
+ *
+ * This class is thread-safe (For developers: concurrent accesses through
+ * different `std::shared_ptr`s to the same object is already thread-safe, while
+ * modifying the same `std::shared_ptr` is not. We never modify a `Ref`, so no
+ * locks are needed. See https://en.cppreference.com/w/cpp/memory/shared_ptr)
+ */
+template <class T> class Ref {
+    static_assert(is_ref<T>::value == false, "Ref should not be nested");
+
+    template <class U> friend class Ref;
+
+    std::shared_ptr<T> ptr_;
+
+  private:
+  public:
+    typedef T Object;
+
+    Ref() = default;
+    // Ref(std::nullptr_t) : Ref() {}
+    constexpr Ref(nullptr_t) noexcept : Ref() {}
+    Ref(const Ref &) = default;
+    Ref(Ref &&) = default;
+    Ref(std::shared_ptr<T> &&ptr) : ptr_(std::move(ptr)) {}
+    // Ref(const std::shared_ptr<T> &ptr) : ptr_(ptr) {}
+
+    // /// NO NOT USE THIS CONSTRUCTOR IN PUBLIC
+    // /// It is public because Pybind11 needs it
+    // Ref(T *ptr) : ptr_(ptr) {}
+
+    /**
+     * Shared with any compatible references
+     */
+    template <class U,
+              typename std::enable_if_t<std::is_base_of_v<T, U>> * = nullptr>
+    Ref(const Ref<U> &other) : ptr_(std::static_pointer_cast<T>(other.ptr_)) {}
+
+    template <class U,
+              typename std::enable_if_t<std::is_base_of_v<T, U>> * = nullptr>
+    Ref &operator=(const Ref<U> &other) {
+        ptr_ = std::static_pointer_cast<T>(other.ptr_);
+        return *this;
+    }
+
+    Ref &operator=(const Ref &) = default;
+    Ref &operator=(Ref &&) = default;
+
+    template <class U> Ref<U> as() const {
+        Ref<U> ret;
+        ret.ptr_ = std::dynamic_pointer_cast<U>(ptr_);
+        return ret;
+    }
+
+    bool isValid() const { return ptr_ != nullptr; }
+
+    T &operator*() const {
+        nnet_assert(isValid(), "Empty pointer.");
+        return *ptr_;
+    }
+
+    T *operator->() const {
+        nnet_assert(isValid(), "Empty pointer.");
+        return ptr_.get();
+    }
+
+    T *get() const {
+        nnet_assert(isValid(), "Empty pointer.");
+        return ptr_.get();
+    }
+
+    friend inline bool operator==(const Ref &lhs, nullptr_t) {
+        return !lhs.isValid();
+    }
+    friend inline bool operator!=(const Ref &lhs, nullptr_t) {
+        return !(lhs == nullptr);
+    }
+    explicit operator bool() const { return ptr_ != nullptr; }
+    bool operator!() { return ptr_ == nullptr; }
+
+    void swap(Ref &__b) noexcept { ptr_.swap(__b.ptr_); }
+};
+
+template <class T, class U,
+          typename std::enable_if_t<std::is_base_of_v<U, T>> * = nullptr>
+Ref<T> as(const Ref<U> &ref) {
+    return ref.template as<T>();
+}
+
+template <typename T, typename... Params> Ref<T> make_ref(Params &&...params) {
+    return Ref(make_shared<T>(std::forward<Params>(params)...));
+}
+
+// Comparator for Ref
+template <typename T> struct is_ref : std::false_type {};
+template <typename T> struct is_ref<Ref<T>> : std::true_type {};
+
+template <class Tuple, std::size_t index = 0, bool address_based>
+typename std::enable_if_t<not is_ref<std::tuple_element_t<index, Tuple>>::value,
+                          bool>
+__ref_less(const Tuple &lhs, const Tuple &rhs) {
+    if constexpr (index >=
+                  std::tuple_size<std::remove_reference_t<Tuple>>::value - 1)
+        return std::get<index>(lhs) < std::get<index>(rhs);
+    else {
+        if (std::get<index>(lhs) != std::get<index>(rhs))
+            return std::get<index>(lhs) < std::get<index>(rhs);
+        else
+            return __ref_less<Tuple, index + 1, address_based>(lhs, rhs);
+    }
+}
+
+template <class Tuple, std::size_t index = 0, bool address_based>
+typename std::enable_if_t<is_ref<std::tuple_element_t<index, Tuple>>::value and
+                              not address_based,
+                          bool>
+__ref_less(const Tuple &lhs, const Tuple &rhs) {
+    if constexpr (index >=
+                  std::tuple_size<std::remove_reference_t<Tuple>>::value - 1)
+        return std::get<index>(lhs)->less(std::get<index>(rhs));
+    else {
+        if (std::get<index>(lhs)->neq(std::get<index>(rhs)))
+            return std::get<index>(lhs)->less(std::get<index>(rhs));
+        else
+            return __ref_less<Tuple, index + 1, address_based>(lhs, rhs);
+    }
+}
+
+template <class Tuple, std::size_t index = 0, bool address_based>
+typename std::enable_if_t<
+    is_ref<std::tuple_element_t<index, Tuple>>::value and address_based, bool>
+__ref_less(const Tuple &lhs, const Tuple &rhs) {
+    if constexpr (index >=
+                  std::tuple_size<std::remove_reference_t<Tuple>>::value - 1)
+        return std::get<index>(lhs).get() < std::get<index>(rhs).get();
+    else {
+        if (std::get<index>(lhs).get() != std::get<index>(rhs).get())
+            return std::get<index>(lhs).get() < std::get<index>(rhs).get();
+        else
+            return __ref_less<Tuple, index + 1, address_based>(lhs, rhs);
+    }
+}
+
+template <class Tuple> bool ref_addr_less(const Tuple &lhs, const Tuple &rhs) {
+    return __ref_less<Tuple, 0, true>(lhs, rhs);
+}
+
+template <class Tuple> bool ref_value_less(const Tuple &lhs, const Tuple &rhs) {
+    return __ref_less<Tuple, 0, false>(lhs, rhs);
+}
+
+template <class Tuple> class RefAddrLess {
+  public:
+    bool operator()(const Tuple &a, const Tuple &b) const {
+        return ref_addr_less(a, b);
+    }
+};
+
+template <class Tuple> class RefValueLess {
+  public:
+    bool operator()(const Tuple &a, const Tuple &b) const {
+        return ref_value_less(a, b);
+    }
+};
+
+// make_ref_from_tuple
+template <typename _Tp, typename _Tuple, size_t... _Idx>
+constexpr Ref<_Tp> make_ref_from_tuple_impl(_Tuple &&__t,
+                                            std::index_sequence<_Idx...>) {
+    return make_ref<_Tp>(std::get<_Idx>(std::forward<_Tuple>(__t))...);
+}
+
+template <typename _Tp, typename _Tuple>
+constexpr Ref<_Tp> make_ref_from_tuple(_Tuple &&__t) {
+    return make_ref_from_tuple_impl<_Tp>(
+        std::forward<_Tuple>(__t),
+        std::make_index_sequence<std::tuple_size_v<std::decay_t<_Tuple>>>{});
+}
+
+} // namespace nnet
+
+// namespace std {
+
+// template <class T> struct hash<ir::Ref<T>> {
+//     hash<T *> hash_;
+//     size_t operator()(const ir::Ref<T> &ref) const { return hash_(ref.get());
+//     }
+// };
+
+// } // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/routine.h b/include/nnet/routine.h
new file mode 100644
index 00000000..48d065d9
--- /dev/null
+++ b/include/nnet/routine.h
@@ -0,0 +1,158 @@
+#pragma once
+#include "common.h"
+#include "expr.h"
+#include <iostream>
+#include <sstream>
+namespace nnet {
+
+class RoutineNode;
+class MatmulNode;
+class ElementWiseNode;
+using Routine = Ref<RoutineNode>;
+using Matmul = Ref<MatmulNode>;
+using ElementWise = Ref<ElementWiseNode>;
+
+#define DEFINE_GETTYPE(CLASS)                                                  \
+    RoutineType getType() const override { return RoutineType::CLASS##Type; }
+
+class RoutineNode {
+  protected:
+    Expr expr;
+    vector<Tensor> inputs;
+
+  public:
+    RoutineNode(Expr _expr, const vector<Tensor> &_inputs);
+    virtual string toReadable() const = 0;
+    const Expr &getExpr() const { return expr; }
+    const vector<Tensor> &getInputs() const { return inputs; }
+    virtual RoutineType getType() const = 0;
+};
+
+using MatmulArgs = tuple<int,   // b
+                         int,   // m
+                         int,   // n
+                         int,   // k
+                         bool,  // transa
+                         bool>; // transb
+
+class MatmulNode : public RoutineNode {
+    int b, m, n, k;
+    bool transa, transb;
+
+  public:
+    MatmulNode(Expr _source, Tensor A, Tensor B, int _b, int _m, int _n, int _k,
+               bool _transa, bool _transb)
+        : RoutineNode(_source, {A, B}), b(_b), m(_m), n(_n), k(_k),
+          transa(_transa), transb(_transb) {}
+    DEFINE_GETTYPE(MatmulNode);
+
+    string toReadable() const override;
+
+    friend bool operator==(const MatmulNode &lhs, const MatmulNode &rhs);
+    MatmulArgs getArgs() { return tuple(b, m, n, k, transa, transb); }
+};
+
+using ConvArgs = tuple<int,  // ph
+                       int,  // pw
+                       int,  // sh
+                       int,  // sw
+                       int,  // dh
+                       int>; // dw
+
+class ConvNode : public RoutineNode {
+    int ph, pw;
+    int sh, sw;
+    int dh, dw;
+
+  public:
+    ConvNode(Expr _source, Tensor A, Tensor K, int _ph, int _pw, int _sh = 1,
+             int _sw = 1, int _dh = 1, int _dw = 1)
+        : RoutineNode(_source, {A, K}), ph(_ph), pw(_pw), sh(_sh), sw(_sw),
+          dh(_dh), dw(_dw) {}
+    DEFINE_GETTYPE(ConvNode);
+
+    string toReadable() const override;
+    vector<int> getShape() const;
+    friend bool operator==(const ConvNode &lhs, const ConvNode &rhs);
+    ConvArgs getArgs() const;
+};
+
+class ElementWiseNode : public RoutineNode {
+    vector<int> outputShape;
+
+  public:
+    // _outputShape is redundent, but expr is still missing for DLT.
+    ElementWiseNode(Expr _source, vector<Tensor> _inputs,
+                    vector<int> _outputShape)
+        : RoutineNode(_source, _inputs), outputShape(_outputShape) {}
+    DEFINE_GETTYPE(ElementWiseNode);
+
+    string toReadable() const override;
+    /**
+     * @brief Get the Estimated Time of mem bound OP.
+     *
+     * @return double Time in ms.
+     */
+    double getEstimatedTime() const;
+    const vector<int> &getOutputShape() const { return outputShape; }
+};
+
+using G2bmmArgs = tuple<int,  // b
+                        int,  // m
+                        int,  // w
+                        int,  // k
+                        int>; // dilation
+class G2bmmNode : public RoutineNode {
+    int b, m, w, k;
+
+  public:
+    G2bmmNode(Expr source, Tensor A, Tensor B, int b, int m, int w, int k,
+              int d = 1)
+        : RoutineNode(source, {A, B}), b(b), m(m), w(w), k(k) {
+        assert(d == 1);
+    }
+    DEFINE_GETTYPE(G2bmmNode);
+
+    vector<int> getShape() const;
+    string toReadable() const override;
+    G2bmmArgs getArgs() const;
+};
+
+using GbmmArgs = tuple<int,  // b
+                       int,  // m
+                       int,  // w
+                       int,  // n
+                       int>; // dilation
+class GbmmNode : public RoutineNode {
+    int b, m, w, n;
+
+  public:
+    GbmmNode(Expr source, Tensor A, Tensor B, int b, int m, int w, int n,
+             int d = 1)
+        : RoutineNode(source, {A, B}), b(b), m(m), w(w), n(n) {
+        assert(d == 1);
+    }
+    DEFINE_GETTYPE(GbmmNode);
+
+    vector<int> getShape() const;
+    string toReadable() const override;
+    GbmmArgs getArgs() const;
+};
+
+// Pretty output for dbg with shared_ptr
+template <typename T, typename std::enable_if_t<
+                          std::is_base_of_v<RoutineNode, T>> *_ = nullptr>
+std::ostream &operator<<(std::ostream &os, const shared_ptr<T> &a) {
+    os << ((!a) ? string("Null shared_ptr") : a->toReadable());
+    return os;
+}
+
+// Pretty output for dbg with shared_ptr
+template <typename T, typename std::enable_if_t<
+                          std::is_base_of_v<RoutineNode, T>> *_ = nullptr>
+std::ostream &operator<<(std::ostream &os, const Ref<T> &a) {
+    os << ((!a) ? string("Null shared_ptr") : a->toReadable());
+    return os;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/test.h b/include/nnet/test.h
new file mode 100644
index 00000000..ab84d9b5
--- /dev/null
+++ b/include/nnet/test.h
@@ -0,0 +1,28 @@
+#pragma once
+#include "common.h"
+#include "derivator.h"
+
+// clang-format off
+#define CAT(A, B) A##B
+#define SELECT(NAME, NUM) CAT(NAME##_, NUM)
+#define GET_COUNT( _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, COUNT, ... ) COUNT
+#define VA_SIZE( ... ) GET_COUNT( __VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
+#define VA_SELECT( NAME, ... ) SELECT( NAME, VA_SIZE(__VA_ARGS__) )(__VA_ARGS__)
+
+#define _DEFVAR_1(name) auto name = make_ref<VarNode>(#name);
+#define _DEFVAR_2(name, ...) _DEFVAR_1(name); _DEFVAR_1(__VA_ARGS__)
+#define _DEFVAR_3(name, ...) _DEFVAR_1(name); _DEFVAR_2(__VA_ARGS__)
+#define _DEFVAR_4(name, ...) _DEFVAR_1(name); _DEFVAR_3(__VA_ARGS__)
+#define _DEFVAR_5(name, ...) _DEFVAR_1(name); _DEFVAR_4(__VA_ARGS__)
+#define _DEFVAR_6(name, ...) _DEFVAR_1(name); _DEFVAR_5(__VA_ARGS__)
+#define _DEFVAR_7(name, ...) _DEFVAR_1(name); _DEFVAR_6(__VA_ARGS__)
+#define _DEFVAR_8(name, ...) _DEFVAR_1(name); _DEFVAR_7(__VA_ARGS__)
+#define _DEFVAR_9(name, ...) _DEFVAR_1(name); _DEFVAR_8(__VA_ARGS__)
+#define DEFINE_VAR(...) VA_SELECT(_DEFVAR, __VA_ARGS__)
+// clang-format on
+
+namespace nnet {
+int matchExprResult(Derivator &derivator, string fn);
+bool checkExprLogSame(string fnPrefix, int start, int end);
+bool checkExprsEquvivalence(VecExpr exprs);
+} // namespace nnet
\ No newline at end of file
diff --git a/include/nnet/visitor.h b/include/nnet/visitor.h
new file mode 100644
index 00000000..c415a097
--- /dev/null
+++ b/include/nnet/visitor.h
@@ -0,0 +1,128 @@
+#pragma once
+#include "common.h"
+#include "derivator.h"
+#include "expr.h"
+#include "routine.h"
+#include <iostream>
+#include <unordered_map>
+
+namespace nnet {
+
+template <typename FType> class Functor;
+
+template <typename R, typename... Args> class Functor<R(Args...)> {
+  protected:
+    int verbose;
+
+    // FIXME: scope should be protected
+  public:
+    Functor(int _verobse = 0) : verbose(_verobse) {}
+    virtual ~Functor() = default;
+#define DISPATCH(CLASS)                                                        \
+    case NodeType::CLASS##Type:                                                \
+        return this->visit_(as<CLASS>(c), std::forward<Args>(args)...);        \
+        break
+
+#define FUNCTOR_DEFAULT                                                        \
+    { return visitDefault(c, std::forward<Args>(args)...); }
+
+    virtual R dispatch(const Expr &c, Args... args) {
+        switch (c->getType()) {
+            DISPATCH(ConstantNode);
+            DISPATCH(BinaryOpNode);
+            DISPATCH(RangeOpNode);
+            DISPATCH(SubscriptNode);
+            DISPATCH(TensorNode);
+            DISPATCH(VarNode);
+            DISPATCH(FuncNode);
+        default:
+            nnet_assert(0, "Unknown type");
+            return R();
+        }
+    }
+
+    virtual R visit_(const Constant &c, Args... args) FUNCTOR_DEFAULT;
+    virtual R visit_(const BinaryOp &c, Args... args) FUNCTOR_DEFAULT;
+    virtual R visit_(const RangeOp &c, Args... args) FUNCTOR_DEFAULT;
+    virtual R visit_(const Subscript &c, Args... args) FUNCTOR_DEFAULT;
+    virtual R visit_(const Var &c, Args... args) FUNCTOR_DEFAULT;
+    virtual R visit_(const Tensor &c, Args... args) FUNCTOR_DEFAULT;
+    virtual R visit_(const Func &c, Args... args) FUNCTOR_DEFAULT;
+    virtual R visitDefault(const Expr &c, [[maybe_unused]] Args... args) {
+        dbg(*c);
+        nnet_assert(0, "Reach unimplemented visit function.");
+        return R();
+    };
+
+    [[deprecated("Define explicit methods for public access.")]] R
+    operator()(const Expr &e, Args... args) {
+        return dispatch(e, std::forward<Args>(args)...);
+    }
+#undef FUNCTOR_DEFAULT
+#undef DISPATCH
+};
+
+class Mutator : public Functor<Expr()> {
+  public:
+    Mutator(int _verobse = 0) : Functor(_verobse) {}
+    Expr visit_(const Constant &c) override;
+    Expr visit_(const BinaryOp &c) override;
+    Expr visit_(const RangeOp &c) override;
+    Expr visit_(const Subscript &c) override;
+    Expr visit_(const Var &c) override;
+    Expr visit_(const Tensor &c) override;
+    Expr visit_(const Func &c) override;
+};
+
+// template <typename... Args>
+// class SingleStageVisitor : public Functor<void, Args...> {
+//   public:
+//     SingleStageVisitor(int _verobse = 0) : Functor<R, Args...>(_verobse) {}
+//     // R visit(const Constant &c) override ;
+//     R visit_(const BinaryOp &c) override {
+//         if (verbose)
+//             dbg(*c);
+//         this->dispatch(c->getLhs());
+//         this->dispatch(c->getRhs());
+//     }
+//     R visit_(const RangeOp &c) override {
+//         if (verbose)
+//             dbg(*c);
+//         this->dispatch(ret->getSummand());
+//         // NOT visit iterators and its ranges
+//     }
+//     R visit_(const Subscript &c) override {
+//         if (verbose)
+//             dbg(*c);
+//         this->dispatch(ret->getObject());
+//         for (size_t i = 0; i < ret->getDims(); ++i)
+//             this->dispatch(ret->getIndex(i));
+//     }
+//     // R visit(const Var &c) override;
+//     // R visit(const Tensor &c) override;
+// };
+
+// } // namespace nnet
+// #include "nnet/Visitor/ReplaceVariable.h"
+// #include "nnet/Visitor/StrideVisitor.h"
+// namespace nnet {
+
+class ExprTreeVisitor : public Functor<void(void)> {
+  private:
+    bool inBinary, inRange, inSub, inTensor;
+
+  public:
+    ExprTreeVisitor(bool _inBinary = 1, bool _inRange = 1, bool _inSub = 1,
+                    bool _inTensor = 1, int _verobse = 0)
+        : Functor(_verobse), inBinary(_inBinary), inRange(_inRange),
+          inSub(_inSub), inTensor(_inTensor) {}
+    void visit_(const Constant &c) override;
+    void visit_(const BinaryOp &c) override;
+    void visit_(const RangeOp &c) override;
+    void visit_(const Subscript &c) override;
+    void visit_(const Var &c) override;
+    void visit_(const Tensor &c) override;
+    void visit_(const Func &c) override;
+};
+
+} // namespace nnet
diff --git a/include/operators/matmul.h b/include/operators/matmul.h
new file mode 100644
index 00000000..b94dabe0
--- /dev/null
+++ b/include/operators/matmul.h
@@ -0,0 +1,47 @@
+#pragma once
+#include "core/operator.h"
+
+namespace infini {
+
+class MatmulNode : public OperatorNode {
+  private:
+    // InfiniTensor assume a row-major tensor layout. transA=false means default
+    // dims, true means A should be transposed before matmul. This is in
+    // oppsite to column-major BLAS.
+    bool transA, transB;
+    ActType act;
+
+    // Auxiliary attributes
+    int b, m, n, k;
+
+  public:
+    MatmulNode(Tensor A, Tensor B, Tensor C, bool transA = false,
+               bool transB = false, Tensor bias = nullptr,
+               ActType act = ActType::None);
+
+    std::string toString() const override;
+    vector<Shape> computeShape() const override;
+
+    int numInputs() const override { return 2; }
+    int numOutputs() const override { return 1; }
+
+    Tensor getBias() const { return inputs[2]; }
+    ActType getAct() const { return act; }
+    bool getTransA() const { return transA; }
+    bool getTransB() const { return transB; }
+    int getB() const { return b; }
+    int getM() const { return m; }
+    int getN() const { return n; }
+    int getK() const { return k; }
+
+    HashType hashWithShape() const override;
+    OpPerfKey getOpPerfKey() const override;
+
+  private:
+    // Q: whether to check the output? Since we can build an Op first and then
+    // assure output.
+    // Fix 1: make shape inference a static method. But OpPerfKey are required.
+    bool checkValid(const TensorVec &inputs) const;
+};
+
+} // namespace infini
diff --git a/include/test.h b/include/test.h
new file mode 100644
index 00000000..05bcdcb1
--- /dev/null
+++ b/include/test.h
@@ -0,0 +1,3 @@
+#pragma once
+#include "core/common.h"
+#include "gtest/gtest.h"
diff --git a/src/core/common.cc b/src/core/common.cc
new file mode 100644
index 00000000..d1c7fd40
--- /dev/null
+++ b/src/core/common.cc
@@ -0,0 +1,14 @@
+#include "core/common.h"
+#include <chrono>
+#include <functional>
+
+namespace infini {
+
+double timeit(const std::function<void()> &func) {
+    auto start = std::chrono::high_resolution_clock::now();
+    func();
+    auto end = std::chrono::high_resolution_clock::now();
+    return std::chrono::duration<double, std::milli>(end - start).count();
+}
+
+} // namespace infini
\ No newline at end of file
diff --git a/src/core/graph.cc b/src/core/graph.cc
new file mode 100644
index 00000000..0f6fb180
--- /dev/null
+++ b/src/core/graph.cc
@@ -0,0 +1,20 @@
+#include "core/graph.h"
+
+namespace infini {
+
+void GraphNode::updateConnection() { IT_TODO_HALT(); }
+
+string GraphNode::toString() const {
+    std::ostringstream oss;
+    oss << "GraphNode operators:\n";
+    for (const auto &op : ops)
+        oss << op << "\n";
+    return oss.str();
+}
+
+void GraphNode::dataMalloc() {
+    for (auto &tensor : tensors)
+        tensor->dataMalloc();
+}
+
+} // namespace infini
\ No newline at end of file
diff --git a/src/core/operator.cc b/src/core/operator.cc
new file mode 100644
index 00000000..b215cb8a
--- /dev/null
+++ b/src/core/operator.cc
@@ -0,0 +1,32 @@
+#include "core/operator.h"
+
+namespace infini {
+
+bool OperatorNode::isLinearOp() const {
+    return enum_to_underlying(type) >= 100 && enum_to_underlying(type) < 200;
+}
+
+bool OperatorNode::isElementWiseOp() const {
+    return enum_to_underlying(type) >= 200 && enum_to_underlying(type) < 300;
+}
+
+bool OperatorNode::isSplitOp() const { return type == OpType::Split; }
+
+bool OperatorNode::isConcatOp() const { return type == OpType::Concat; }
+
+bool OperatorNode::isComputeOp() const {
+    return type == OpType::Conv || type == OpType::Matmul ||
+           type == OpType::ConvTrans || type == OpType::G2BMM ||
+           type == OpType::GBMML;
+}
+
+bool OperatorNode::isTransposeOp() const { return type == OpType::Transpose; }
+
+bool OperatorNode::isReshapeOp() const { return type == OpType::Reshape; }
+
+bool OperatorNode::isMemBoundOp() const {
+    return type == OpType::MemBound || type == OpType::Activation ||
+           type == OpType::Transpose;
+}
+
+} // namespace infini
\ No newline at end of file
diff --git a/src/core/run_engine.cc b/src/core/run_engine.cc
new file mode 100644
index 00000000..ba6878bc
--- /dev/null
+++ b/src/core/run_engine.cc
@@ -0,0 +1,105 @@
+#include "core/run_enigne.h"
+#include <chrono>
+
+namespace infini {
+
+void RunEngine::run(const Graph &graph, bool tune, bool profiling) const {
+    if (!tune && profiling)
+        IT_TODO_HALT();
+    const auto &kernelRegistry = KernelRegistry::getInstance();
+    auto perfEngine = PerfEngine::getInstance();
+    // Statistics
+    double totalTime = 0;
+    std::map<OpType, double> opTime;
+    std::map<OpType, int> opCnt;
+    std::chrono::system_clock::time_point begin, end;
+
+    for (auto &op : graph->getOperators()) {
+        // HACK: set correct data type
+        auto kernelAttrs =
+            KernelAttrs{device, op->getOpType(), DataType::Int32};
+        Kernel *kernel = kernelRegistry.getKernel(kernelAttrs);
+        auto perfKey = PerfEngine::Key{kernelAttrs, op->getOpPerfKey()};
+        std::optional<PerfRecord> perfData = perfEngine.getPerfData(perfKey);
+
+        // If no record and disable tuning, run with the default argument
+        if (!perfData && !tune) {
+            kernel->compute(op);
+            continue;
+        }
+
+        // TODO: The copy of record should be eliminated
+        PerfRecord record;
+        // Tune the kernel if there is no record
+        if (!perfData) {
+            record = kernel->tune(op);
+            perfEngine.setPerfData(perfKey, record);
+        } else
+            record = *perfData;
+
+        if (!profiling) {
+            kernel->compute(op, *perfData);
+            continue;
+        } else {
+            double t = timeit([&]() { kernel->compute(op, *perfData); });
+            op->print();
+            printf(" op_time %lf\n", t);
+            totalTime += t;
+            opTime[op->getOpType()] += t;
+            opCnt[op->getOpType()]++;
+        }
+    }
+    if (profiling)
+        printProfilingData(totalTime, opTime, opCnt);
+}
+
+double RunEngine::getPerfTime(const Graph &graph, bool profiling) const {
+    const auto &kernelRegistry = KernelRegistry::getInstance();
+    auto perfEngine = PerfEngine::getInstance();
+    // Statistics
+    double totalTime = 0;
+    std::map<OpType, double> opTime;
+    std::map<OpType, int> opCnt;
+
+    for (auto &op : graph->getOperators()) {
+        // HACK: set correct data type
+        auto kernelAttrs =
+            KernelAttrs{device, op->getOpType(), DataType::Int32};
+        Kernel *kernel = kernelRegistry.getKernel(kernelAttrs);
+        auto perfKey = PerfEngine::Key{kernelAttrs, op->getOpPerfKey()};
+        std::optional<PerfRecord> perfData = perfEngine.getPerfData(perfKey);
+
+        PerfRecord record;
+        // Tune the kernel if there is no record
+        if (!perfData) {
+            record = kernel->tune(op);
+            perfEngine.setPerfData(perfKey, record);
+        } else
+            record = *perfData;
+
+        double t = record.time;
+        totalTime += t;
+        if (profiling) {
+            op->print();
+            printf(" op_time %lf\n", t);
+            opTime[op->getOpType()] += t;
+            opCnt[op->getOpType()]++;
+        }
+    }
+    if (profiling)
+        printProfilingData(totalTime, opTime, opCnt);
+    return totalTime;
+}
+
+void RunEngine::printProfilingData(double totalTime,
+                                   const std::map<OpType, double> &opTime,
+                                   const std::map<OpType, int> &opCnt) const {
+    printf("%11s %3s %7s %7s %7s\n", "Op", "Cnt", "T_tot", "Percent", "T_mean");
+    for (const auto &[type, t] : opTime) {
+        printf("%11s %3d %7.3f %7.1f %7.3f\n",
+               OpRegistry::getOpName(type).data(), opCnt.at(type), t,
+               t / totalTime * 100, t / opCnt.at(type));
+    }
+}
+
+} // namespace infini
\ No newline at end of file
diff --git a/src/core/tensor.cc b/src/core/tensor.cc
new file mode 100644
index 00000000..18460986
--- /dev/null
+++ b/src/core/tensor.cc
@@ -0,0 +1,90 @@
+#include <core/tensor.h>
+namespace infini {
+
+TensorNode::TensorNode(const Shape &shape, DataType dtype)
+    : TensorBaseNode(shape.size(), dtype), shape(shape) {}
+
+void TensorNode::dataMalloc() {
+    IT_ASSERT(data == nullptr);
+    // initialized to zero
+    data.reset(reinterpret_cast<VType *>(calloc(size(), sizeof(VType))));
+}
+
+VType TensorNode::getData(const Shape &pos) const {
+    return getData(getOffset(pos));
+}
+
+string TensorNode::toString() const {
+    return "TensorNode " + std::to_string(guid);
+}
+
+size_t TensorNode::getOffset(const Shape &pos) const {
+    auto nDim = pos.size();
+    IT_ASSERT(shape.size() == nDim);
+    if (pos.empty())
+        return 0;
+    for (size_t i = 0; i < nDim; ++i)
+        IT_ASSERT(pos[i] < 0 || pos[i] >= shape[i]);
+    size_t idx = pos[0];
+    size_t dm = 0;
+    while (++dm < nDim)
+        idx = idx * shape[dm] + pos[dm];
+    return idx;
+}
+
+size_t TensorNode::size() const {
+    size_t ret = 1;
+    for (const auto &d : shape)
+        ret *= d;
+    return ret;
+}
+
+void TensorNode::copyData(VType *dptr) {
+    IT_ASSERT(data != nullptr);
+    size_t sz = size();
+#pragma omp parallel for
+    for (size_t i = 0; i < sz; ++i) {
+        data[i] = dptr[i];
+    }
+}
+
+void TensorNode::printData() const {
+    IT_ASSERT(data != nullptr);
+    std::cout << "Tensor: " << guid << std::endl;
+    auto numDims = shape.size();
+    auto dimSzVec = std::vector<int>(numDims, 1);
+    dimSzVec[numDims - 1] = shape[numDims - 1];
+    for (int i = numDims - 1; i != 0; --i)
+        dimSzVec[i - 1] = dimSzVec[i] * shape[i - 1];
+    for (size_t i = 0, iEnd = size(); i < iEnd; ++i) {
+        for (size_t j = 0; j < numDims; ++j) {
+            if (i % dimSzVec[j] == 0) {
+                std::cout << "[";
+            }
+        }
+        std::cout << data[i];
+        for (size_t j = 0; j < numDims; ++j) {
+            if ((int)i % dimSzVec[j] == dimSzVec[j] - 1) {
+                std::cout << "]";
+            }
+        }
+        if (i != size() - 1)
+            std::cout << ", ";
+        if ((int)i % dimSzVec[numDims - 1] == dimSzVec[numDims - 1] - 1)
+            std::cout << std::endl;
+    }
+}
+
+bool TensorNode::equalData(const Tensor &rhs) const {
+    IT_ASSERT(data != nullptr);
+    IT_ASSERT(rhs->data != nullptr);
+    if (shape != rhs->getDims())
+        return false;
+    size_t sz = size();
+    for (size_t i = 0; i < sz; ++i)
+        if (data[i] != rhs->data[i])
+            return false;
+    return true;
+}
+
+}; // namespace infini
\ No newline at end of file
diff --git a/src/core/tensor_base.cc b/src/core/tensor_base.cc
new file mode 100644
index 00000000..72297ce0
--- /dev/null
+++ b/src/core/tensor_base.cc
@@ -0,0 +1,9 @@
+#include <core/tensor_base.h>
+namespace infini {
+
+TensorBaseNode::TensorBaseNode(int dim, DataType dtype)
+    : dim(dim), dtype(dtype) {}
+
+VType TensorBaseNode::getData(size_t offset) const { return data[offset]; }
+
+}; // namespace infini
\ No newline at end of file
diff --git a/src/kerels/cpu/matmul.cc b/src/kerels/cpu/matmul.cc
new file mode 100644
index 00000000..84fa53a3
--- /dev/null
+++ b/src/kerels/cpu/matmul.cc
@@ -0,0 +1,38 @@
+#include "operators/matmul.h"
+#include "core/kernel.h"
+
+namespace infini {
+
+template <typename T> class NaiveMatmul : public Kernel {
+    void compute(const Operator &_op, const PerfRecord &record) const override {
+        auto op = as<MatmulNode>(_op);
+        T *A = reinterpret_cast<T *>(op->getInputs(0)->getDataPtr().get());
+        T *B = reinterpret_cast<T *>(op->getInputs(1)->getDataPtr().get());
+        T *C = reinterpret_cast<T *>(op->getOutput()->getDataPtr().get());
+        IT_ASSERT(op->getTransA() == false && op->getTransB() == false);
+        IT_ASSERT(op->getAct() == ActType::None);
+        IT_ASSERT(op->getB() == 1);
+        const int M = op->getM(), N = op->getN(), K = op->getK();
+        for (int i = 0; i < M; i++) {
+            for (int j = 0; j < N; j++) {
+                C[i * N + j] = 0;
+                for (int k = 0; k < K; k++) {
+                    C[i * N + j] += A[i * K + k] * B[k * N + j];
+                }
+            }
+        }
+    }
+
+    void compute(const Operator &op) const override { compute(op, {}); }
+
+    PerfRecord tune(const Operator &op) const override {
+        return PerfRecord{.time = timeit([this, &op]() { compute(op); })};
+    }
+};
+
+REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Int32,
+                NaiveMatmul<uint32_t>, "MatmulNaive_CPU_uint32");
+REGISTER_KERNEL(Device::CPU, OpType::Matmul, DataType::Float32,
+                NaiveMatmul<float>, "MatmulNaive_CPU_float32");
+
+} // namespace infini
\ No newline at end of file
diff --git a/src/nnet/Pass/MatchComputationKernel.cc b/src/nnet/Pass/MatchComputationKernel.cc
new file mode 100644
index 00000000..ece2a8f2
--- /dev/null
+++ b/src/nnet/Pass/MatchComputationKernel.cc
@@ -0,0 +1,25 @@
+#include "nnet/Pass/MatchComputationKernel.h"
+#include "nnet/Visitor/PatternMatcher.h"
+
+namespace nnet {
+
+// RE: is this duplicate with Rule6KenerlMatching?
+void MatchComputationKernel::transform(Formula &origin, int depth, Expr &rCur) {
+    nnet_assert(derivator.getSearchState() == 2, __LINE__);
+    auto cur = as<RangeOpNode>(rCur);
+    // Build wrapper stages for enforce axis starts from 0
+    PatternMatcher patternMatcher(derivator, cur);
+    cur = patternMatcher.getOffsetCur();
+
+    auto matches = patternMatcher.matchWithPattern(
+        cur, getPattern(derivator.getTargetOp()));
+    matches = patternMatcher.applyWrapper(matches);
+
+    for (auto newCur : matches) {
+        derivator.setSearchState(3);
+        nextStep(origin, depth, rCur, newCur);
+        derivator.setSearchState(2);
+    }
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/MatchMemBoundKernel.cc b/src/nnet/Pass/MatchMemBoundKernel.cc
new file mode 100644
index 00000000..ee2ad313
--- /dev/null
+++ b/src/nnet/Pass/MatchMemBoundKernel.cc
@@ -0,0 +1,23 @@
+#include "nnet/Pass/MatchMemBoundKernel.h"
+#include "nnet/Visitor/InputVisitor.h"
+
+namespace nnet {
+
+void MatchMemBoundKernel::transform(Formula &origin, int depth, Expr &rCur) {
+    // FIXME: Whether the Formula is a Membound OP should be checked.
+    nnet_assert(derivator.getSearchState() == 3, __LINE__);
+    nnet_assert(origin.root.get() == rCur.get(),
+                "Only match the entire formula as a Membound Op");
+    auto rangeOp = as<RangeOpNode>(origin.root);
+    const auto &inputs = InputVisitor().getInputs(rangeOp);
+    auto source =
+        make_ref<ElementWiseNode>(rangeOp, inputs, rangeOp->getOutputShape());
+    auto tensor =
+        makeTensor(newTensorName(), rangeOp->getOutputShape(), {}, source);
+    // The original code directly appends candidate. But it seems should be done
+    // by the search.
+    // appendCanddiate(as<TensorNode>(tensor), depth);
+    nextStep(origin, depth, rCur, tensor);
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Pass.cc b/src/nnet/Pass/Pass.cc
new file mode 100644
index 00000000..72edd4bf
--- /dev/null
+++ b/src/nnet/Pass/Pass.cc
@@ -0,0 +1,58 @@
+#include "nnet/Pass/Pass.h"
+#include "nnet/Visitor/CloneMutator.h"
+
+namespace nnet {
+
+Pass::Pass(Derivator &derivator, const string &passName)
+    : derivator(derivator), passName(passName),
+      enableLogging(derivator.getLogMode() != Derivator::LogMode::NoLog),
+      enableDebug(false) {}
+
+Pass::~Pass() = default;
+
+void Pass::setEnableLogging(bool value) { enableLogging = value; }
+
+void Pass::setEnableDebug(bool value) { enableDebug = value; }
+
+void Pass::run(Formula &origin, int dfsDepth, Expr &rCur) {
+    initialize(origin, rCur);
+    transform(origin, dfsDepth, rCur);
+    finalize();
+}
+
+void Pass::initialize(Formula &origin, const Expr &rCur) {}
+
+void Pass::finalize() {}
+
+Var Pass::getNewVar() { return derivator.getNewVar(); }
+
+string Pass::newTensorName() { return derivator.newTensorName(); }
+
+void Pass::nextStep(Formula &origin, int depth, Expr &rCur, Expr newCur,
+                    const string &ruleMsg) {
+    // push rule action description
+    if (enableLogging) {
+        rCur.swap(newCur);
+        derivator.pushIntermediateState(origin.root);
+        rCur.swap(newCur);
+        derivator.pushRuleState(passName);
+        derivator.pushRuleMsg(ruleMsg);
+    }
+
+    if (enableDebug) {
+        // In debug mode, do not recur but save the transformed state
+        transformations.emplace_back(CloneMutator().clone(newCur));
+    } else
+        derivator.nextStep(origin, depth, rCur, newCur);
+
+    // pop rule action description
+    if (enableLogging) {
+        derivator.popIntermediateState();
+        derivator.popRuleState();
+        derivator.popRuleMsg();
+    }
+}
+
+const VecExpr &Pass::getTransformations() { return transformations; }
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule1VariableSplit.cc b/src/nnet/Pass/Rule1VariableSplit.cc
new file mode 100644
index 00000000..7e08dfc7
--- /dev/null
+++ b/src/nnet/Pass/Rule1VariableSplit.cc
@@ -0,0 +1,134 @@
+#include "nnet/Pass/Rule1VariableSplit.h"
+#include "nnet/Visitor/ReplaceVariable.h"
+
+namespace nnet {
+
+void Rule1VariableSplit::transform(Formula &origin, int depth, Expr &rCur) {
+    auto cur = as<RangeOpNode>(rCur);
+    vector<Replace> replaces = getSplitableVar(cur);
+    // for (const auto &replace : replaces)
+    //     dbg(replace.oldIters, replace.newIters, replace.psis,
+    //         replace.newVarRanges);
+    for (const auto &replace : replaces) {
+        auto replacedSummand = replaceIters(cur->getSummand(), replace);
+        if (!replacedSummand) {
+            // TODO: if a real getMergableExprs is implemented, this case should
+            // be an error. Since the expr should appear in the AST.
+            dbg("Warning: No replacment happens.");
+            continue;
+        }
+        auto inner =
+            ReplaceKit::replaceRangeOpIterator(cur, replace, replacedSummand);
+        // build the outerRange{innerRange}[indexForInner] to do DLT
+        Expr nextCur = nullptr;
+        if (replace.iteratorType == IterationType::Loop) {
+            auto subscriptedInner =
+                ReplaceKit::buildSubscirptForLoopVarReplace(inner, replace);
+            nextCur = ReplaceKit::buildDLTOuterRangeOp(cur, subscriptedInner);
+        } else
+            nextCur = inner;
+
+        string msg = "====== END rule1 VariableSplit: ";
+        dbg(msg, replace.oldIters, replace.newIters, replace.phis,
+            replace.psis);
+        msg = replace.toReadable();
+        nextStep(origin, depth, rCur, nextCur, msg);
+    }
+}
+
+vector<Replace> Rule1VariableSplit::getSplitableVar(const RangeOp &rangeOp) {
+    vector<Replace> ret;
+    // Split strategy
+    vector<int> SumFactors, LoopFactors;
+    if (derivator.getPassMode() == Derivator::PassMode::Debug) {
+        SumFactors = {3};
+        LoopFactors = {4};
+    } else if (derivator.getPassMode() == Derivator::PassMode::Full) {
+        SumFactors = {2, 3};
+        // LoopFactors = {3, 4};
+        LoopFactors = {4};
+    } else
+        nnet_unimplemented_halt();
+
+    // Split Sum variable
+    for (const int k : SumFactors) {
+        for (const auto &[var, range] : rangeOp->getSumVarRanges()) {
+            int len = range.second - range.first;
+            auto p1 = getNewVar(); // p1=i/k
+            auto p2 = getNewVar(); // p2=i%k
+            if (len > 10 || len <= k || len % k != 0)
+                continue;
+
+            Range range1, range2;
+            if (range.first < 0) {
+                nnet_unimplemented_halt();
+                // FIXME: this must be ERROR
+                range1.first = range.first / k;
+                range1.second = range1.first + len / k;
+                range2.first = -k / 2;
+                range2.second = range2.first + k;
+            } else if (range.first == 0) {
+                range1.first = 0;
+                range1.second = len / k;
+                range2.first = 0;
+                range2.second = k;
+            } else {
+                nnet_unimplemented_continue();
+                continue;
+            }
+            Replace replace{.iteratorType = IterationType::Sum,
+                            .oldIters = {var},
+                            .newIters = {p1, p2},
+                            .phis = {},
+                            .psis = {make_ref<ConstantNode>(k) * p1 + p2},
+                            .newVarRanges = {{p1, range1}, {p2, range2}}};
+            ret.emplace_back(replace);
+        }
+    }
+    for (const int k : LoopFactors) {
+        // Split Loop variable
+        for (const auto &[var, range] : rangeOp->getLoopVarRanges()) {
+            const int len = range.second - range.first;
+            // Debug HACK for dilated SG2BMM
+            if (derivator.getPassMode() == Derivator::PassMode::Debug &&
+                !(var->getName() == "m" && len % k == 0))
+                continue;
+
+            // Illeagel conditions
+            if (range.second - range.first <= k ||
+                (range.second - range.first) % k != 0)
+                continue;
+            // Unsupport conditions
+            if (range.first != 0)
+                continue;
+            auto p1 = getNewVar(); // p1=i/k
+            auto p2 = getNewVar(); // p2=i%k
+            Range range1(0, len / k);
+            Range range2(0, k);
+            nnet_assert(range1.second > 0 && range2.second > 0,
+                        "Empty loop dim");
+            Replace replace{.iteratorType = IterationType::Loop,
+                            .oldIters = {var},
+                            .newIters = {p1, p2},
+                            .phis = {var / 4, var % 4},
+                            .psis = {make_ref<ConstantNode>(k) * p1 + p2},
+                            .newVarRanges = {{p1, range1}, {p2, range2}}};
+            ret.emplace_back(replace);
+        }
+    }
+    return ret;
+}
+
+Expr Rule1VariableSplit::replaceIters(Expr cur, const Replace &replace) {
+    // TODO [feature]: support multiple replacements in one mutator
+    if (replace.oldIters.size() != 1) {
+        nnet_unimplemented_continue();
+        return nullptr;
+    }
+    auto replaceMutator =
+        ReplaceVariable(replace.oldIters.at(0), replace.psis.at(0));
+    auto ret = replaceMutator(cur);
+    return ret;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule2VariableMerging.cc b/src/nnet/Pass/Rule2VariableMerging.cc
new file mode 100644
index 00000000..55e6b581
--- /dev/null
+++ b/src/nnet/Pass/Rule2VariableMerging.cc
@@ -0,0 +1,186 @@
+#include "nnet/Pass/Rule2VariableMerging.h"
+#include "nnet/Visitor/CheckOOBVisitor.h"
+
+namespace nnet {
+
+void Rule2VariableMerging::transform(Formula &origin, int depth, Expr &rCur) {
+    // Extract r and s
+    auto cur = as<RangeOpNode>(rCur);
+    vector<Replace> replaces = getMergableReplaces(cur, depth);
+    // dbg("Start rule2VariableMerging", depth, mergableExprs, *cur);
+    for (const auto &replace : replaces) {
+        if (replace.iteratorType != IterationType::Loop) {
+            nnet_unimplemented_continue();
+            continue;
+        }
+        // replace vars in summand
+        auto replacedSummand = ReplaceKit::replaceMultipleExprs(
+            cur->getSummand(), replace.oldIters, replace.psis, true);
+        // replace var in rangeOp
+        auto inner =
+            ReplaceKit::replaceRangeOpIterator(cur, replace, replacedSummand);
+        // If OOB happens, this transformation is skipped
+        if (CheckOOBVisitor().checkRangeOp(inner))
+            continue;
+        // build the outerRange{innerRange}[indexForInner] to do DLT
+        auto subscriptedInner =
+            ReplaceKit::buildSubscirptForLoopVarReplace(inner, replace);
+        auto outer = ReplaceKit::buildDLTOuterRangeOp(cur, subscriptedInner);
+
+        // next searching step
+        string msg = replace.toReadable();
+        nextStep(origin, depth, rCur, outer, msg);
+    }
+}
+
+vector<Replace> Rule2VariableMerging::getMergableReplaces(RangeOp rangeOp,
+                                                          int depth) {
+    vector<Replace> ret;
+    IteratorTable exprIT;
+    if (!exprIT.analyzeExpr(rangeOp)) {
+        nnet_unimplemented_continue();
+        return ret;
+    }
+    exprIT.buildTableWithDefaultMap();
+    const auto &strideInAllDim = exprIT.getStrideInDim();
+
+    set<pair<Iterator, Iterator>, RefValueLess<pair<Iterator, Iterator>>>
+        checkedIterPairs{};
+    // strideInAllDim: [tensorID][dimOfTensor][Iterator]=stride
+    for (size_t tensorID = 0; tensorID < strideInAllDim.size(); ++tensorID) {
+        const auto &strideInDimsOfATensor = strideInAllDim[tensorID];
+        for (const PtrMap<Iterator, int> &strideInADim :
+             strideInDimsOfATensor) {
+            for (const auto &it1 : strideInADim) {
+                for (const auto &it2 : strideInADim) {
+                    // Backdoor for rule-based search
+                    if (substituteRules.count(depth)) {
+                        if (substituteRules[depth].at(0)->neq(it1.first))
+                            continue;
+                        if (substituteRules[depth].at(1)->neq(it2.first))
+                            continue;
+                    }
+                    if (!(it1.first->equal(it2.first) &&
+                          it1.second == it2.second) &&
+                        rangeOp->hasLoopVar(it1.first) &&
+                        rangeOp->hasLoopVar(it2.first)) {
+                        // 2 iters -> 2 iters
+                        if (auto opt = getReplaceMappingTwoLoopIters(rangeOp,
+                                                                     it1, it2))
+                            ret.emplace_back(*opt);
+
+                        // 2 iters -> 1 iter
+                        const auto iterPair = pair(it1.first, it2.first);
+                        if (!checkedIterPairs.count(iterPair)) {
+                            checkedIterPairs.insert(iterPair);
+                            if (auto opt = getReplaceMergingTwoLoopIters(
+                                    rangeOp, it1, it2, exprIT, tensorID))
+                                ret.emplace_back(*opt);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return ret;
+}
+
+optional<Replace> Rule2VariableMerging::getReplaceMergingTwoLoopIters(
+    const RangeOp &rangeOp, pair<Iterator, int> pairA,
+    pair<Iterator, int> pairB, const IteratorTable &exprIT, int tensorID) {
+    // 1*A + sb*B -> C
+    // A=C%sb, B=C/sb
+    // ax+by->z, a=1 or -1
+    // For a>0 and b>0 : x=z%b, y=z/b
+    auto x = pairA.first, y = pairB.first;
+    int a = pairA.second, b = pairB.second;
+    if (abs(a) != 1 || abs(a) * abs(b) <= 0)
+        return {};
+    if (a < 0 && b > 0) { // The only unhandled case
+        nnet_unimplemented_continue();
+        return {};
+    }
+    // negative substitution happens only if can be totally merged. So if the
+    // variable appears in another index, skip it.
+    if (a < 0 || b < 0) {
+        if (exprIT.getNumInputs() > 1) {
+            if (exprIT.getStridesInTensor(x, 1 - tensorID) != 0)
+                return {};
+            if (exprIT.getStridesInTensor(y, 1 - tensorID) != 0)
+                return {};
+        }
+    }
+    Range rangeX = rangeOp->getVarRange(x).second,
+          rangeY = rangeOp->getVarRange(y).second;
+    if (rangeX.first != 0 || rangeY.first != 0)
+        return {};
+    int lenX = rangeX.second - rangeX.first;
+    if (abs(b) != lenX)
+        return {};
+    auto z = getNewVar();
+
+    Range rangeExpr{0, 1}; // 1 is the open interval compensation
+    auto calcRangeExpr = [&rangeExpr](int stride, const Range &r) {
+        if (stride > 0) {
+            rangeExpr.first += stride * r.first;
+            rangeExpr.second += stride * (r.second - 1);
+        } else {
+            rangeExpr.first += stride * (r.second - 1);
+            rangeExpr.second += stride * r.first;
+        }
+    };
+    calcRangeExpr(a, rangeX);
+    calcRangeExpr(b, rangeY);
+
+    // build the phi/psi for index transformation
+    // phi: j_x=(i_x...),  psi: i_x=(j_x...)
+    auto ret = optional<Replace>();
+    ret.emplace();
+    ret->iteratorType = IterationType::Loop;
+    ret->newIters = {z};
+    ret->oldIters = {x, y};
+    ret->phis = {a * x + b * y - rangeExpr.first};
+    // For b < 0, the psis are not an equavalent replace. Since it must be
+    // simplified (z/b and z%b will be merged), the only important thing is
+    // their strides should be mergable. To merge the strides, an extra minus
+    // are introduced if their stride is negative.
+    ret->psis = {a * (z % b) + a * rangeExpr.first, (b > 0 ? 1 : -1) * (z / b)};
+    ret->newVarRanges = {{z, {0, rangeExpr.second - rangeExpr.first}}};
+    return ret;
+}
+
+optional<Replace>
+Rule2VariableMerging::getReplaceMappingTwoLoopIters(const RangeOp &rangeOp,
+                                                    pair<Iterator, int> pairA,
+                                                    pair<Iterator, int> pairB) {
+    // the first iterator is replaced, the second remains
+    auto i1 = pairA.first, i2 = pairB.first;
+    int sa = pairA.second, sb = pairB.second;
+    // TODO: can be relaxed to sb|sb
+    if (sa != 1 || sb == 0)
+        return {};
+    if (sb < 0) {
+        nnet_unimplemented_continue();
+        return {};
+    }
+    Range rangeA = rangeOp->getVarRange(i1).second;
+    Range rangeB = rangeOp->getVarRange(i2).second;
+    auto j1 = getNewVar(), j2 = getNewVar();
+    Range rangeJ1, rangeJ2 = rangeB;
+    assert(pairA.second == 1);
+    rangeJ1.first = rangeA.first + rangeB.first * sb;
+    rangeJ1.second = rangeA.second + (rangeB.second - 1) * sb;
+    // build the phi/psi for index transformation
+    // phi: j_x=(i_x...),  psi: i_x=(j_x...)
+    auto ret = optional<Replace>();
+    ret.emplace();
+    ret->iteratorType = IterationType::Loop;
+    ret->newIters = {j1, j2};
+    ret->oldIters = {i1, i2};
+    ret->newVarRanges = {{j1, rangeJ1}, {j2, rangeJ2}};
+    ret->phis = {sa * i1 + sb * i2, i2};
+    ret->psis = {j1 - (sb / sa) * j2, j2};
+    return ret;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule3StageSplit.cc b/src/nnet/Pass/Rule3StageSplit.cc
new file mode 100644
index 00000000..5701bb51
--- /dev/null
+++ b/src/nnet/Pass/Rule3StageSplit.cc
@@ -0,0 +1,82 @@
+#include "nnet/Pass/Rule3StageSplit.h"
+#include "nnet/permutation.h"
+
+namespace nnet {
+
+void Rule3StageSplit::transform(Formula &origin, int depth, Expr &rCur) {
+    auto cur = as<RangeOpNode>(rCur);
+    vector<vector<Iterator>> splitSchemes = getSplitSummationIters(cur);
+
+    for (const auto &varSplit : splitSchemes) {
+
+        bool isSplittable = false;
+        for (const auto &splitted : varSplit)
+            if (cur->hasSumVar(splitted))
+                isSplittable = true;
+        assert(isSplittable);
+
+        const vector<VarRangePair> loopVars = cur->getLoopVarRanges(),
+                                   sumVars = cur->getSumVarRanges();
+        // move iterators from Sigma to Loop
+        vector<VarRangePair> innerLoopVars, innerSumVars, outerSumVars;
+        VecExpr indexForInner;
+        for (const auto &kv : sumVars) {
+            bool isSplitted = false;
+            for (const auto &iter : varSplit)
+                if (iter == kv.first->getName())
+                    isSplitted = true;
+            if (isSplitted) {
+                innerLoopVars.emplace_back(kv);
+                outerSumVars.emplace_back(kv);
+            } else
+                innerSumVars.emplace_back(kv);
+        }
+        innerLoopVars.insert(innerLoopVars.end(), loopVars.begin(),
+                             loopVars.end());
+        for (const auto &[var, _] : innerLoopVars)
+            indexForInner.emplace_back(var);
+
+        // if no sum iterator, the stage is redundant
+        assert(!innerSumVars.empty());
+        auto inner =
+            makeRangeOperator(innerLoopVars, innerSumVars, cur->getSummand());
+        auto subscriptedInner = make_ref<SubscriptNode>(inner, indexForInner);
+        auto outer = makeRangeOperator(cur->getLoopVarRanges(), outerSumVars,
+                                       subscriptedInner);
+        outer->setPaddings(cur->getPaddings());
+
+        // next searching step
+        string msg = "Separate sum iters: " + serializeVec(varSplit);
+        nextStep(origin, depth, rCur, outer, msg);
+    }
+}
+
+vector<vector<Iterator>>
+Rule3StageSplit::getSplitSummationIters(RangeOp rangeOp) {
+    // set<string> varSplit = {"r", "s", "i3", "i13"};
+    vector<vector<Iterator>> ret;
+    // Rule-based Hint
+    // vector<vector<Iterator>> heuristics = {{"r", "s"}, {"i3", "i13"}};
+    // for (const auto &iterSet : heuristics) {
+    //     bool notExist = false;
+    //     for (const auto &iter : iterSet)
+    //         if (!rangeOp->hasSumVar(iter))
+    //             notExist = true;
+    //     if (!notExist)
+    //         ret.emplace_back(iterSet);
+    // }
+    // if (!rulesOverall.empty())
+    //     return ret;
+    vector<Iterator> sumIters;
+    for (const auto &[iter, range] : rangeOp->getSumVarRanges())
+        sumIters.emplace_back(iter);
+    if (sumIters.size() <= 1)
+        return ret;
+    SubsetGenerator gen(sumIters);
+    do {
+        ret.emplace_back(gen.get());
+    } while (gen.next());
+    return ret;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule4StageMerging.cc b/src/nnet/Pass/Rule4StageMerging.cc
new file mode 100644
index 00000000..5c2eb392
--- /dev/null
+++ b/src/nnet/Pass/Rule4StageMerging.cc
@@ -0,0 +1,91 @@
+#include "nnet/Pass/Rule4StageMerging.h"
+#include "nnet/Visitor/ReplaceNodeMutator.h"
+#include "nnet/Visitor/ReplaceVariable.h"
+
+namespace nnet {
+
+void Rule4StageMerging::transform(Formula &origin, int depth, Expr &rCur) {
+    success = rule4StageMerging(origin, depth, rCur, mergeStageWithCalc);
+}
+
+bool Rule4StageMerging::isSuccessful() { return success; }
+
+void Rule4StageMerging::setMergeStageWithCalc(bool value) {
+    mergeStageWithCalc = value;
+}
+
+bool Rule4StageMerging::rule4StageMerging(Formula &origin, int depth,
+                                          Expr &rCur, bool mergeStageWithCalc) {
+    auto rangeOp0 = as<RangeOpNode>(rCur);
+    const Subscript &sub0 = as<SubscriptNode>(rangeOp0->getSummand());
+    if (!sub0)
+        return false;
+    const auto &rangeOp1 = as<RangeOpNode>(sub0->getObject());
+    if (!rangeOp1)
+        return false;
+    const auto &sub1 = as<SubscriptNode>(rangeOp1->getSummand());
+    if (!sub1)
+        return false;
+    // merge stage with calculation only when mergeStageWithCalc=true
+    if (!mergeStageWithCalc && !rangeOp1->getSumVarRanges().empty())
+        return false;
+    // Only propogate paddings in perfect nested dimension
+    if (rangeOp1->hasPaddings()) {
+        auto oldTensor = as<TensorNode>(sub1->getObject());
+        if (!oldTensor) {
+            nnet_unimplemented_continue();
+            return 0;
+        }
+    }
+    // repalce variables: iters of rangeOp1 repalced by indexes of sub0
+    map<string, pair<Expr, Expr>> varMapping;
+    assert(sub0->getDims() == rangeOp1->getLoopVarRanges().size());
+    for (size_t i = 0; i < sub0->getDims(); ++i) {
+        varMapping[rangeOp1->getLoopVar(i)->getName()] =
+            pair(rangeOp1->getLoopVar(i), sub0->getIndex(i));
+    }
+    ReplaceVariable replaceVariable{varMapping};
+    auto merged = make_ref<RangeOpNode>(*rangeOp0);
+    merged->setSummand(replaceVariable(sub1));
+    // a naive approach to propogate paddings
+    if (rangeOp1->hasPaddings()) {
+        auto oldTensor = as<TensorNode>(sub1->getObject());
+        auto newTensor = make_ref<TensorNode>(*oldTensor);
+        for (int i = 0; i < rangeOp1->getNumOutputDims(); ++i) {
+            if (rangeOp1->getPaddings(i) == 0)
+                continue;
+            auto loopVar = rangeOp1->getLoopVar(i);
+            // FIXME: in fact this var should not appear in other index as well,
+            // which may result in OOB
+            bool findSingleVarAsIndex = false;
+            for (size_t subIndexID = 0; subIndexID < sub1->getDims();
+                 ++subIndexID) {
+                auto index = sub1->getIndex(subIndexID);
+                if (auto indexVar = as<VarNode>(index);
+                    indexVar && (indexVar->equal(loopVar))) {
+                    newTensor->setPadding(subIndexID,
+                                          newTensor->getPadding(subIndexID) +
+                                              rangeOp1->getPaddings(i));
+                    findSingleVarAsIndex = true;
+                }
+            }
+            if (!findSingleVarAsIndex) {
+                nnet_unimplemented_continue();
+                return false;
+            }
+        }
+        merged = as<RangeOpNode>(
+            ReplaceNodeMutator().replace(merged, oldTensor.get(), newTensor));
+        assert(merged != nullptr);
+    }
+    // Merge inner stage sums
+    if (!rangeOp1->getSumVarRanges().empty())
+        merged->setSumIterator(rangeOp1->getSumVarRanges());
+
+    // next searching step
+    // if mergeStageWithCalc, depth counts for invocation in rule-based search
+    nextStep(origin, (mergeStageWithCalc) ? depth : depth - 1, rCur, merged);
+    return true;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule5RangeRelaxation.cc b/src/nnet/Pass/Rule5RangeRelaxation.cc
new file mode 100644
index 00000000..81c62385
--- /dev/null
+++ b/src/nnet/Pass/Rule5RangeRelaxation.cc
@@ -0,0 +1,72 @@
+#include "nnet/Pass/Rule5RangeRelaxation.h"
+#include "nnet/Visitor/RangeRelaxFunctor.h"
+
+namespace nnet {
+
+void Rule5RangeRelaxation::transform(Formula &origin, int depth, Expr &rCur) {
+    rule5RangeRelaxation(origin, depth, rCur);
+}
+
+Expr Rule5RangeRelaxation::rule5RangeRelaxation(Formula &origin, int depth,
+                                                Expr &rCur) {
+    auto cur = as<RangeOpNode>(rCur);
+    if (cur->hasPaddings()) {
+        // string msg = "====== END rule5RangeRelaxation: Paddings exist \n";
+        // dbg(msg);
+        return nullptr;
+    }
+
+    // Infer meaningful calculation range
+    RangeRelaxFunctor rangeRexlaxtionFunctor{cur};
+    RangeMap rangeMap = rangeRexlaxtionFunctor(cur);
+    auto relaxedCur = make_ref<RangeOpNode>(*cur);
+    bool isRelaxed = false;
+    vector<int> paddings;
+    // check whether narrow the calculation range
+    for (size_t i = 0; i < cur->getLoopVarRanges().size(); ++i) {
+        const auto &[iter, iterRange] =
+            cur->getVarRange(IterationType::Loop, i);
+        if (auto it = rangeMap.find(iter); it != rangeMap.end()) {
+            // intersection of validRange and iterRange is necessary computation
+            // TODO: it is redundant with RangeRelaxFunctor::intersectRangeMaps.
+            // An independent Range class might be necessary.
+            const Range &validRange = it->second;
+            Range relaxedRange{max(iterRange.first, validRange.first),
+                               min(iterRange.second, validRange.second)};
+            if (relaxedRange != iterRange) {
+                isRelaxed = true;
+                relaxedCur->setVarRange(IterationType::Loop, i,
+                                        {iter, relaxedRange});
+                paddings.emplace_back(
+                    max(relaxedRange.first - iterRange.first,
+                        iterRange.second - relaxedRange.second));
+            } else
+                paddings.emplace_back(0);
+        } else
+            paddings.emplace_back(0);
+    }
+    relaxedCur->setPaddings(paddings);
+    if (!isRelaxed) {
+        // string msg = "====== END rule5RangeRelaxation: Relaxation not
+        // found\n"; dbg(msg);
+        return nullptr;
+    }
+
+    // next searching step
+    string detailedMsg;
+    for (size_t i = 0; i < cur->getLoopVarRanges().size(); ++i) {
+        const auto &[v, a] = cur->getVarRange(IterationType::Loop, i);
+        const auto &[_, b] = relaxedCur->getVarRange(IterationType::Loop, i);
+        if (a != b) {
+            detailedMsg += v->getName();
+            detailedMsg +=
+                " (" + to_string(a.first) + "," + to_string(a.second) + ") to";
+            detailedMsg +=
+                " (" + to_string(b.first) + "," + to_string(b.second) + "),";
+        }
+    }
+    nextStep(origin, depth, rCur, relaxedCur, detailedMsg);
+    return relaxedCur;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule6KenerlMatching.cc b/src/nnet/Pass/Rule6KenerlMatching.cc
new file mode 100644
index 00000000..7a2a8976
--- /dev/null
+++ b/src/nnet/Pass/Rule6KenerlMatching.cc
@@ -0,0 +1,57 @@
+#include "nnet/Pass/Rule6KenerlMatching.h"
+#include "nnet/Visitor/InputVisitor.h"
+#include "nnet/Visitor/PatternMatcher.h"
+
+namespace nnet {
+
+void Rule6KenerlMatching::transform(Formula &origin, int depth, Expr &rCur) {
+    auto cur = as<RangeOpNode>(rCur);
+    // Build wrapper stages for enforce axis starts from 0
+    PatternMatcher patternMatcher(derivator, cur);
+    cur = patternMatcher.getOffsetCur();
+
+    // Match matchable routines
+    for (int i = 0; i < MatchableRoutineTypeCnt; ++i) {
+        auto targetOp = idToRoutineType(i);
+        // During guided search, only check the target OP
+        if (derivator.getTargetOp() != RoutineType::NoneType &&
+            derivator.getTargetOp() != targetOp)
+            continue;
+        auto replaces =
+            patternMatcher.matchWithPattern(cur, getPattern(targetOp));
+        replaces = patternMatcher.applyWrapper(replaces);
+        for (auto newCur : replaces)
+            nextStep(origin, depth, rCur, newCur);
+    }
+    { // Match element-wise OP
+        auto replaces = matchElementWise(cur);
+        if (!replaces.empty())
+            dbg(rCur);
+        for (auto newCur : replaces)
+            nextStep(origin, depth, rCur, newCur);
+    }
+}
+
+VecExpr Rule6KenerlMatching::matchElementWise(const RangeOp &rangeOp) {
+    // If the stage is compute bound, then do not convert it.
+    int64_t flops = rangeOp->getFlops(), outputSize = rangeOp->getOutputSize();
+    int64_t inputSize = rangeOp->getInputSize(rangeOp);
+    if (double(flops) / (inputSize + outputSize) > 3)
+        return {};
+
+    vector<int> newShape;
+    for (const auto &[var, range] : rangeOp->getLoopVarRanges()) {
+        if (range.first != 0) {
+            nnet_unimplemented_continue();
+            return {};
+        }
+        newShape.emplace_back(range.second - range.first);
+    }
+    const auto &inputs = InputVisitor().getInputs(rangeOp);
+    auto source =
+        make_ref<ElementWiseNode>(rangeOp, inputs, rangeOp->getOutputShape());
+    auto newTensor = makeTensor(newTensorName(), newShape, {}, source);
+    return {newTensor};
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule7DLT.cc b/src/nnet/Pass/Rule7DLT.cc
new file mode 100644
index 00000000..b78fdf54
--- /dev/null
+++ b/src/nnet/Pass/Rule7DLT.cc
@@ -0,0 +1,78 @@
+#include "nnet/Pass/Rule7DLT.h"
+#include "nnet/Visitor/ReplaceNodeMutator.h"
+#include "nnet/dlt.h"
+
+namespace nnet {
+
+void Rule7DLT::transform(Formula &origin, int depth, Expr &rCur) {
+    auto cur = as<RangeOpNode>(rCur);
+    if (!cur)
+        return;
+    auto op = as<BinaryOpNode>(cur->getSummand());
+    if (!op)
+        return;
+    auto subs = {op->getLhs(), op->getRhs()};
+    for (auto subExpr : subs) {
+        auto sub = as<SubscriptNode>(subExpr);
+        if (!sub)
+            continue;
+        auto tensor = as<TensorNode>(sub->getObject());
+        if (!tensor)
+            continue;
+        // // HACK for G2BMM
+        // if (tensor->getDims() != 3)
+        //     continue;
+        for (const auto factor : getFactors()) {
+            for (int targetDim = 0; targetDim < tensor->getDims();
+                 ++targetDim) {
+                if (tensor->getShape(targetDim) % factor)
+                    continue;
+                // Debug hint for G2BMM
+                if (derivator.getPassMode() == Derivator::PassMode::Debug) {
+                    if (tensor->getShape(targetDim) != 10000)
+                        continue;
+                    assert(targetDim == 1);
+                }
+                DLT dlt;
+                dlt.split(targetDim, factor);
+                vector<int> newOrder(tensor->getDims() + 1);
+                for (int i = 0; i < tensor->getDims() + 1; ++i)
+                    newOrder[i] = i;
+                newOrder[targetDim]++;
+                newOrder[targetDim + 1]--;
+                dlt.reorder(newOrder);
+                dlt.merge(targetDim, targetDim + 1);
+                if (auto opt = dlt.apply(cur, sub, newTensorName())) {
+                    Expr newSummand = ReplaceNodeMutator().replace(
+                        cur->getSummand(), sub.get(), *opt);
+                    auto newCur = buildDLTSingleRangeOp(cur, newSummand);
+
+                    // next searching step
+                    string msg = "====== END rule7DLT\n";
+                    dbg(msg);
+                    nextStep(origin, depth, rCur, newCur);
+                }
+            }
+        }
+    }
+}
+
+Expr Rule7DLT::buildDLTSingleRangeOp(const RangeOp &original,
+                                     const Expr &newSummand) {
+    auto rangeOp = make_ref<RangeOpNode>(*original);
+    rangeOp->setSummand(newSummand);
+    return rangeOp;
+}
+
+vector<int> Rule7DLT::getFactors() {
+    if (derivator.getPassMode() == Derivator::PassMode::Debug) {
+        return {4};
+    } else if (derivator.getPassMode() == Derivator::PassMode::Full) {
+        return {3, 4};
+    } else {
+        nnet_unimplemented_halt();
+        return {};
+    }
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule8GuidedDLT.cc b/src/nnet/Pass/Rule8GuidedDLT.cc
new file mode 100644
index 00000000..fcf6c8cf
--- /dev/null
+++ b/src/nnet/Pass/Rule8GuidedDLT.cc
@@ -0,0 +1,317 @@
+#include "nnet/Pass/Rule8GuidedDLT.h"
+#include "nnet/Visitor/ReplaceNodeMutator.h"
+
+namespace nnet {
+
+static int bitCount(unsigned int n) {
+    int count = 0;
+    while (n != 0) {
+        n = n & (n - 1);
+        count++;
+    }
+    return count;
+}
+
+static int bitPosition(unsigned int n) {
+    assert(bitCount(n) == 1);
+    int ret = 0;
+    for (n >>= 1; n; n >>= 1)
+        ++ret;
+    return ret;
+}
+
+void Rule8GuidedDLT::transform(Formula &origin, int depth, Expr &rCur) {
+    guidedDLT(origin, depth, rCur);
+}
+
+VecExpr Rule8GuidedDLT::guidedDLT(Formula &origin, int depth, Expr &rCur,
+                                  bool debug) {
+    string detailedMsg;
+    VecExpr ret;
+    auto cur = as<RangeOpNode>(rCur);
+    // check cur satisfies T1[A]*T2[B]
+
+    if (!statisfyGuidedDLT(cur))
+        return ret;
+
+    IteratorTable exprIT;
+    if (!exprIT.analyzeExpr(cur))
+        return ret;
+    exprIT.buildTableWithDefaultMap();
+
+    bool setTargetOpHere = false;
+    for (int i = 0; i < MatchableRoutineTypeCnt; ++i) {
+        // if not correctly unset this variable
+        assert(setTargetOpHere == false);
+        // If the guide direction is set
+        if (derivator.getTargetOp() != RoutineType::NoneType &&
+            idToRoutineType(i) != derivator.getTargetOp())
+            continue;
+        // Warning: no continue befor unset the targetOp
+        if (derivator.getTargetOp() == RoutineType::NoneType) {
+            setTargetOpHere = true;
+            derivator.setTargetOp(idToRoutineType(i));
+        }
+        const Pattern &pattern = getPattern(derivator.getTargetOp());
+        auto mismatches = exprIT.matchPatternIT(pattern);
+        // Pruning less possible results
+
+        // std::cout << "mismatches= " << mismatches.size()
+        //           << "; setTargetOpHere: " << setTargetOpHere << "; ";
+        // std::cout << "TargetOp = " <<
+        // static_cast<int>(derivator.getTargetOp())
+        //           << "; mismatches : ";
+        // for (const auto i : mismatches)
+        //     std::cout << static_cast<int>(i.type) << " ";
+        // std::cout << endl;
+        if (mismatches.size() == 0) {
+            derivator.setSearchState(2);
+            nextStep(origin, depth, rCur, rCur);
+            derivator.setSearchState(1);
+        }
+        if (mismatches.size() > 0 && mismatches.size() <= 2) {
+            for (const auto &mismatch : mismatches) {
+                Expr newCur;
+                if (mismatch.type == MismatchType::MoreVar) {
+                    newCur = guidedDLTMoreVar2(cur, mismatch, exprIT, pattern);
+                    detailedMsg += "guidedDLTMoreVar2 ";
+                } else if (mismatch.type == MismatchType::DLMismatch ||
+                           mismatch.type == MismatchType::OutputDLMismatch) {
+                    if (mismatches.size() > 1) {
+                        nnet_unimplemented_continue();
+                        break;
+                    }
+                    newCur =
+                        guidedDLTDLMismatch(cur, mismatch, exprIT, pattern);
+                    detailedMsg += "guidedDLTDLMismatch ";
+                }
+                // std::cout << "newCur= "
+                //           << ((newCur == nullptr) ? "Nullptr"
+                //                                   : newCur->toReadable())
+                //           << endl;
+                if (!newCur)
+                    continue;
+                if (debug)
+                    ret.emplace_back(newCur);
+                // next searching step
+                detailedMsg = "Toward " +
+                              getPatternName(derivator.getTargetOp()) + ". " +
+                              detailedMsg;
+                nextStep(origin, depth, rCur, newCur, detailedMsg);
+            }
+        }
+        // Unset targetOp
+        if (setTargetOpHere) {
+            derivator.setTargetOp(RoutineType::NoneType);
+            setTargetOpHere = false;
+        }
+    }
+    return ret;
+}
+
+Expr Rule8GuidedDLT::guidedDLTDLMismatch(
+    const RangeOp &cur, const Mismatch &mismatch,
+    [[maybe_unused]] const IteratorTable &exprIT, const Pattern &pattern) {
+    assert(mismatch.type == MismatchType::DLMismatch ||
+           mismatch.type == MismatchType::OutputDLMismatch);
+    // Currently only deal with ouput DLT
+    if (mismatch.bitmap != pattern.getNumInputs()) {
+        nnet_unimplemented_continue();
+        return nullptr;
+    }
+    vector<VarRangePair> newVarRanges;
+    for (const auto &[var, _] : pattern.getRangeOp()->getLoopVarRanges()) {
+        const auto &iterInExpr = mismatch.mappingIter_r.at(var);
+        newVarRanges.emplace_back(cur->getVarRange(iterInExpr));
+    }
+    auto inner = make_ref<RangeOpNode>(*cur);
+    inner->setLoopIterator(newVarRanges);
+    auto subscriptedInner =
+        ReplaceKit::buildSubscirptForLoopVarReplace(inner, {});
+    auto outer = ReplaceKit::buildDLTOuterRangeOp(cur, subscriptedInner);
+    return outer;
+}
+
+bool Rule8GuidedDLT::statisfyGuidedDLT(RangeOp cur) const {
+    auto mul = as<BinaryOpNode>(cur->getSummand());
+    if (!mul)
+        return false;
+    if (mul->getOpType() != OpType::Mul)
+        return false;
+    return as<SubscriptNode>(mul->getLhs()) && as<SubscriptNode>(mul->getRhs());
+}
+
+Expr Rule8GuidedDLT::guidedDLTMoreVar2(const RangeOp &cur,
+                                       const Mismatch &mismatch,
+                                       const IteratorTable &exprIT,
+                                       const Pattern &pattern) {
+    int bitmap = mismatch.bitmap;
+    const auto &mergedItersDefaultOrder = exprIT.getPosTable(bitmap);
+
+    // Assure vars only appear in one input tensor
+    int bitmapOfInputs = bitmap & ((1 << exprIT.getNumInputs()) - 1);
+    if (bitCount(bitmapOfInputs) > 1)
+        return nullptr;
+    if (pattern.getPosTable(bitmap).size() != 1) {
+        nnet_unimplemented_continue();
+        return nullptr;
+    }
+    if (mergedItersDefaultOrder.size() < 1)
+        return nullptr;
+    int tensorID = bitPosition(bitmapOfInputs);
+    if (!checkElementsHaveOnlyOneAccessIteratorSet(exprIT, tensorID))
+        return nullptr;
+    vector<Var> oldVars; // i_1, ...
+    vector<Var> newVars; // j_1, ...
+    VecExpr psis;        // i_1=\psi_1(j_1, ...)
+    VecExpr phis;        // j_1=\phi_1(i_1, ...), not necessary for Sum iter
+    vector<VarRangePair> newVarRanges;
+
+    auto originalTensor = exprIT.getTensor(tensorID);
+    auto originalSub = exprIT.getSubscript(tensorID);
+    vector<bool> mergedDims(originalTensor->getDims());
+
+    // Heuristic: merge iters according to their appearance positions
+    std::multimap<int, Var> sortedMergedIters;
+    for (const auto &iter : mergedItersDefaultOrder) {
+        vector<int> dims = exprIT.getIterDimInTensor(tensorID, iter);
+        assert(dims.size() == 1);
+        sortedMergedIters.emplace(dims[0], iter);
+    }
+    vector<Var> mergedIters; // decides the order of fused dims
+    for (const auto &[_, v] : sortedMergedIters)
+        mergedIters.emplace_back(v);
+
+    // Add the merged iterators
+    const auto newVar = getNewVar();
+    newVars.emplace_back(newVar);
+    int newRange = 1;
+    for (const auto &iter : mergedIters) {
+        oldVars.emplace_back(iter);
+        auto range = cur->getRange(iter);
+        newRange *= (range.second - range.first);
+        // if (range.first == 0)
+        //     nnet_unimplemented_halt();
+    }
+    newVarRanges.emplace_back(newVar, Range{0, newRange});
+    // Add psis for each old iterator
+    int remainingRange = newRange;
+    Expr phi = nullptr;
+    for (const auto &iter : mergedIters) {
+        auto oldVar = iter;
+        auto range = cur->getRange(iter);
+        int len = (range.second - range.first);
+        remainingRange /= len;
+        Expr psi = newVar;
+        if (remainingRange > 1)
+            psi = psi / remainingRange;
+        if (newRange > remainingRange * len)
+            psi = psi % len;
+        int start = cur->getRange(iter).first;
+        if (start != 0)
+            psi = psi + start;
+        psis.emplace_back(psi);
+        phi = phi + remainingRange * (oldVar - start);
+    }
+    Replace replace{.iteratorType = IterationType::Loop,
+                    .oldIters = oldVars,
+                    .newIters = newVars,
+                    .phis = VecExpr{phi},
+                    .psis = psis,
+                    .newVarRanges = newVarRanges};
+    // HACK: decide the rebuild data shape order
+    // TODO: get a partial iter mapping and permutate them?
+    vector<Var> tensorDimAxes{newVars};
+    vector<int> newShape;
+    for (const auto &[var, range] : newVarRanges)
+        newShape.emplace_back(range.second - range.first);
+    for (int row = 0; row < exprIT.getNumRows(); ++row) {
+        // Deal with other dimensions of the current tensor
+        if (row == bitmap || ((row & (1 << tensorID)) == 0))
+            continue;
+        using StrideIter = tuple<int, int, Iterator>;
+        vector<StrideIter> strideIters;
+
+        for (size_t i = 0; i < exprIT.getPosTable(row).size(); ++i) {
+            const auto &iter = exprIT.getPosTable(row)[i];
+            const Range range = cur->getRange(iter);
+            const int len = range.second - range.first;
+
+            // HACK Sort according to original stride. (keep original order)
+            strideIters.emplace_back(-exprIT.getStridesInTensor(iter, tensorID),
+                                     len, iter);
+
+            // // HACK for conv
+            // if (iter == "n")
+            //     strideIters.emplace_back(2, len, iter);
+            // else if (iter == "c")
+            //     strideIters.emplace_back(1, len, iter);
+            // else
+            //     strideIters.emplace_back(0, len, iter);
+        }
+        // HACK: Assure the order of iterators
+        std::sort(strideIters.begin(), strideIters.end(),
+                  ref_value_less<StrideIter>);
+        for (const auto &[_, len, oldIter] : strideIters) {
+            const auto &oldVar = oldIter;
+            tensorDimAxes.emplace_back(oldVar);
+            newShape.emplace_back(len);
+        }
+    }
+
+    // build DLT source
+    const auto sourceExpr =
+        buildGuidedDLTSource(originalSub, replace, tensorDimAxes, newShape);
+    const auto sourceRoutine = make_ref<ElementWiseNode>(
+        sourceExpr, vector<Tensor>{originalTensor}, newShape);
+    // build stage connections
+    const auto newTensor =
+        makeTensor(newTensorName(), newShape, {}, sourceRoutine);
+    const auto &newSub = makeSubscript(
+        newTensor, VecExpr(tensorDimAxes.begin(), tensorDimAxes.end()));
+    // TODO [1124]: get variable mapping and reorder L according to it
+    // dbg(cur, originalSub, newSub, newVarRanges, replace.toReadable(),
+    //     tensorDimAxes, newShape);
+
+    // Replace the entire subscript(A[xxxxx,xxx]) in the summand
+    Expr newSummand = ReplaceNodeMutator().replace(cur->getSummand(),
+                                                   originalSub.get(), newSub);
+    auto inner = ReplaceKit::replaceRangeOpIterator(cur, replace, newSummand);
+    auto subscriptedInner =
+        ReplaceKit::buildSubscirptForLoopVarReplace(inner, replace);
+    auto outer = ReplaceKit::buildDLTOuterRangeOp(cur, subscriptedInner);
+    return outer;
+}
+
+bool Rule8GuidedDLT::checkElementsHaveOnlyOneAccessIteratorSet(
+    const IteratorTable &exprIT, int tensorID) {
+    const auto &strideInDim = exprIT.getStrideInDim();
+    for (const auto &strideForOneDim : strideInDim[tensorID]) {
+        vector<pair<int, int>> strideLengthPairs;
+        for (const auto &[iter, s] : strideForOneDim) {
+            const auto &range = exprIT.getRangeOp()->getRange(iter);
+            strideLengthPairs.emplace_back(s, range.second - range.first);
+        }
+        std::sort(strideLengthPairs.begin(), strideLengthPairs.end());
+        for (size_t i = 0; i < strideLengthPairs.size() - 1; ++i) {
+            const auto &[stride, length] = strideLengthPairs[i];
+            if (stride * length > strideLengthPairs[i + 1].first)
+                return false;
+        }
+    }
+    return true;
+}
+
+Expr Rule8GuidedDLT::buildGuidedDLTSource(const Subscript &originalSub,
+                                          Replace replace,
+                                          vector<Var> tensorDimAxes,
+                                          vector<int> newShape) {
+    Expr newSub = ReplaceKit::replaceMultipleExprs(
+        originalSub, replace.oldIters, replace.psis, true);
+    vector<VarRangePair> loopVarRangePairs;
+    for (size_t i = 0; i < tensorDimAxes.size(); ++i)
+        loopVarRangePairs.emplace_back(tensorDimAxes[i], pair(0, newShape[i]));
+    return makeRangeOperator(loopVarRangePairs, {}, newSub);
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule90TwoStageElementWise.cc b/src/nnet/Pass/Rule90TwoStageElementWise.cc
new file mode 100644
index 00000000..9ea0c9a2
--- /dev/null
+++ b/src/nnet/Pass/Rule90TwoStageElementWise.cc
@@ -0,0 +1,54 @@
+#include "nnet/Pass/Rule90TwoStageElementWise.h"
+#include "nnet/Visitor/InputVisitor.h"
+
+namespace nnet {
+
+void Rule90TwoStageElementWise::transform(Formula &origin, int depth,
+                                          Expr &rCur) {
+    auto cur = as<RangeOpNode>(rCur);
+    { // Match element-wise OP
+        auto replaces = matchTwoStageElementWise(cur);
+        // if (!replaces.empty())
+        //     dbg(rCur);
+        // dbg(replaces);
+        for (auto newCur : replaces)
+            nextStep(origin, depth, rCur, newCur);
+    }
+}
+
+VecExpr
+Rule90TwoStageElementWise::matchTwoStageElementWise(const RangeOp &rangeOp) {
+    // If the stage is compute bound, then do not convert it.
+    int64_t flops = rangeOp->getFlops(), outputSize = rangeOp->getOutputSize();
+    int64_t inputSize = rangeOp->getInputSize(rangeOp);
+    if (double(flops) / (inputSize + outputSize) > 3)
+        return {};
+    auto outerSub = as<SubscriptNode>(rangeOp->getSummand());
+    if (!outerSub)
+        return {};
+    auto innerRangeOp = as<RangeOpNode>(outerSub->getObject());
+    if (!innerRangeOp)
+        return {};
+    auto innerSub = as<SubscriptNode>(innerRangeOp->getSummand());
+    if (!innerSub)
+        return {};
+    auto innerTensor = as<TensorNode>(innerSub->getObject());
+    if (!innerTensor)
+        return {};
+
+    vector<int> newShape;
+    for (const auto &[var, range] : rangeOp->getLoopVarRanges()) {
+        if (range.first != 0) {
+            nnet_unimplemented_continue();
+            return {};
+        }
+        newShape.emplace_back(range.second - range.first);
+    }
+    const auto &inputs = InputVisitor().getInputs(rangeOp);
+    auto source =
+        make_ref<ElementWiseNode>(rangeOp, inputs, rangeOp->getOutputShape());
+    auto newTensor = makeTensor(newTensorName(), newShape, {}, source);
+    return {newTensor};
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule91MergeStagesWithSum.cc b/src/nnet/Pass/Rule91MergeStagesWithSum.cc
new file mode 100644
index 00000000..8086f6d9
--- /dev/null
+++ b/src/nnet/Pass/Rule91MergeStagesWithSum.cc
@@ -0,0 +1,11 @@
+#include "nnet/Pass/Rule91MergeStagesWithSum.h"
+#include "nnet/Pass/Rule4StageMerging.h"
+
+namespace nnet {
+
+void Rule91MergeStagesWithSum::transform(Formula &origin, int depth,
+                                         Expr &rCur) {
+    Rule4StageMerging(derivator).rule4StageMerging(origin, depth, rCur, true);
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Pass/Rule9RangeMagnify.cc b/src/nnet/Pass/Rule9RangeMagnify.cc
new file mode 100644
index 00000000..d831c476
--- /dev/null
+++ b/src/nnet/Pass/Rule9RangeMagnify.cc
@@ -0,0 +1,45 @@
+#include "nnet/Pass/Rule9RangeMagnify.h"
+#include "nnet/Visitor/RangeMagnifyVisitor.h"
+
+namespace nnet {
+
+void Rule9RangeMagnify::transform(Formula &origin, int depth, Expr &rCur) {
+    auto cur = as<RangeOpNode>(rCur);
+    if (cur->hasPaddings()) {
+        // string msg = "====== END rule9RangeMagnify: Paddings exist \n";
+        // dbg(msg);
+        return;
+    }
+    // HACK for conv5x5
+    vector<VarRangePair> newSumVarRanges;
+    for (const auto &[var, range] : cur->getSumVarRanges()) {
+        if (range.first == 0 && range.second == 5) {
+            newSumVarRanges.emplace_back(
+                var, Range{range.first, (range.second + 2) / 3 * 3});
+        } else
+            newSumVarRanges.emplace_back(var, range);
+    }
+    if (newSumVarRanges.empty())
+        return;
+    auto magnifiedCur = RangeMagnifyVisitor().magnify(cur, newSumVarRanges);
+    if (!magnifiedCur)
+        return;
+
+    // next searching step
+    string msg = "relax iterating ranges ";
+    for (size_t i = 0; i < cur->getSumVarRanges().size(); ++i) {
+        const auto &[v1, a] = cur->getVarRange(IterationType::Sum, i);
+        const auto &[v2, b] = magnifiedCur->getVarRange(IterationType::Sum, i);
+        assert(v1->getName() == v2->getName());
+        if (a != b) {
+            msg += v1->getName();
+            msg +=
+                " (" + to_string(a.first) + "," + to_string(a.second) + ") to";
+            msg += " (" + to_string(b.first) + "," + to_string(b.second) + "),";
+        }
+    }
+    nextStep(origin, depth, rCur, magnifiedCur, msg);
+    return;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/AsTVMVisitor.cc b/src/nnet/Visitor/AsTVMVisitor.cc
new file mode 100644
index 00000000..5044eb9e
--- /dev/null
+++ b/src/nnet/Visitor/AsTVMVisitor.cc
@@ -0,0 +1,165 @@
+#include "nnet/Visitor/AsTVMVisitor.h"
+
+namespace nnet {
+
+std::string AsTVMVisitor::visit_(const Constant &c) {
+    return std::to_string(c->getValue());
+}
+std::string AsTVMVisitor::visit_(const BinaryOp &c) {
+    switch (c->getOpType()) {
+    case OpType::Add:
+        return "(" + dispatch(c->getLhs()) + " + " + dispatch(c->getRhs()) +
+               ")";
+    case OpType::Sub:
+        return "(" + dispatch(c->getLhs()) + " - " + dispatch(c->getRhs()) +
+               ")";
+    case OpType::Mul:
+        return "(" + dispatch(c->getLhs()) + " * " + dispatch(c->getRhs()) +
+               ")";
+    case OpType::Div:
+        return "(" + dispatch(c->getLhs()) + " // " + dispatch(c->getRhs()) +
+               ")";
+    case OpType::Mod:
+        return "(" + dispatch(c->getLhs()) + " % " + dispatch(c->getRhs()) +
+               ")";
+    default:
+        assert(false);
+    }
+}
+std::string AsTVMVisitor::visit_(const Func &c) {
+    switch (c->getFuncType()) {
+    case FuncType::Relu:
+        // TODO: Deduce the dtype
+        return "te.max(" + dispatch(c->getObject()) +
+               ", tvm.tir.const(0, 'float32'))";
+    case FuncType::Tanh:
+        return "te.tanh(" + dispatch(c->getObject()) + ")";
+    default:
+        assert(false);
+    }
+}
+std::string AsTVMVisitor::visit_(const RangeOp &c) {
+    auto outerStage = curStage;
+    curStage = nStage++;
+
+    std::string stmt;
+    std::string stageName = "s" + std::to_string(curStage);
+    std::vector<std::string> reduceVars;
+    for (auto &&[var, range] : c->getSumVarRanges()) {
+        std::string varName = stageName + "_" + var->getName();
+        stmt += varName + " = " + "te.reduce_axis((" +
+                std::to_string(range.first) + ", " +
+                std::to_string(range.second) + "), name=\"" + varName + "\")\n";
+        reduceVars.emplace_back(varName);
+        pythonVars.emplace_back(varName);
+    }
+    std::vector<int> shape;
+    stmt += stageName + " = te.compute((";
+    for (size_t i = 0, n = c->getLoopVarRanges().size(); i < n; i++) {
+        auto &&[var, range] = c->getLoopVarRanges()[i];
+        std::string varName = stageName + "_" + var->getName();
+        offset[varName] = -range.first + c->getPaddings(i);
+        auto len = range.second - range.first + 2 * c->getPaddings(i);
+        stmt += std::to_string(len) + ", ";
+        shape.emplace_back(len);
+    }
+    stmt += "), lambda ";
+    bool first = true;
+    for (auto &&[var, range] : c->getLoopVarRanges()) {
+        std::string varName = stageName + "_" + var->getName();
+        stmt += (first ? "" : ", ") + varName;
+        first = false;
+    }
+    std::string summand = dispatch(c->getSummand());
+    if (!reduceVars.empty()) {
+        summand = "te.sum(" + summand + ", axis=(";
+        for (auto &&var : reduceVars) {
+            summand += var + ", ";
+        }
+        summand += "))";
+    }
+    if (c->hasPaddings()) {
+        std::string guard = "tir.if_then_else(tir.all(";
+        bool first = true;
+        for (size_t i = 0, n = c->getLoopVarRanges().size(); i < n; i++) {
+            auto &&[var, range] = c->getLoopVarRanges()[i];
+            std::string varName = stageName + "_" + var->getName();
+            if (auto pad = c->getPaddings(i); pad > 0) {
+                guard += (first ? "" : ", ") + varName +
+                         " >= " + std::to_string(range.first) + ", " + varName +
+                         " < " + std::to_string(range.second);
+                first = false;
+            }
+        }
+        // TODO: Deduce the dtype
+        guard += "), " + summand + ", tvm.tir.const(0.0, \"float32\"))";
+        summand = guard;
+    }
+    stmt += ": " + summand + ")";
+    stmts += stmt + "\n";
+
+    pythonVars.emplace_back(stageName);
+    output = stageName;
+    outputShape = std::move(shape);
+    curStage = outerStage;
+    return stageName;
+}
+std::string AsTVMVisitor::visit_(const Subscript &c) {
+    std::string str = dispatch(c->getObject()) + "[";
+    for (size_t i = 0, n = c->getIndex().size(); i < n; i++) {
+        const auto &idx = c->getIndex()[i];
+        str += (i == 0 ? "" : ", ") + dispatch(idx);
+        if (c->getObject()->getType() == NodeType::RangeOpNodeType) {
+            auto rangeOp = as<RangeOpNode>(c->getObject());
+            str += " - " +
+                   std::to_string(rangeOp->getLoopVarRanges()[i].second.first -
+                                  rangeOp->getPaddings(i));
+        }
+    }
+    str += "]";
+    return str;
+}
+std::string AsTVMVisitor::visit_(const Var &c) {
+    std::string stageName = "s" + std::to_string(curStage);
+    std::string varName = stageName + "_" + c->getName();
+    if (offset.count(varName)) {
+        return "(" + varName + " - " + std::to_string(offset.at(varName)) + ")";
+    } else {
+        return varName;
+    }
+}
+std::string AsTVMVisitor::visit_(const Tensor &c) {
+    pythonVars.emplace_back(c->getName());
+    inputs.emplace_back(c->getName());
+    inputShapes.emplace_back(c->getShape());
+    std::string stmt = c->getName() + " = te.placeholder((";
+    for (auto &&dim : c->getShape()) {
+        stmt += std::to_string(dim) + ", ";
+    }
+    stmt += "), name='" + c->getName() + "')";
+    stmts += stmt + "\n";
+    return c->getName();
+}
+std::string AsTVMVisitor::getStmts() const {
+    std::string ret;
+
+    // Workaround because closure capturing does not work in an `exec`
+    // https://stackoverflow.com/questions/2749655/why-are-closures-broken-within-exec
+    ret += "global ";
+    bool first = true;
+    for (auto &&var : pythonVars) {
+        ret += (first ? "" : ", ") + var;
+        first = false;
+    }
+    ret += "\n";
+
+    ret += stmts;
+    ret += "ret = [" + output;
+    for (auto &&input : inputs) {
+        ret += ", " + input;
+    }
+    ret += "]\n";
+    return ret;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/CheckOOBVisitor.cc b/src/nnet/Visitor/CheckOOBVisitor.cc
new file mode 100644
index 00000000..4bdde4dc
--- /dev/null
+++ b/src/nnet/Visitor/CheckOOBVisitor.cc
@@ -0,0 +1,35 @@
+#include "nnet/Visitor/CheckOOBVisitor.h"
+#include "nnet/Visitor/SimplifyExprVisitor.h"
+
+namespace nnet {
+
+void CheckOOBVisitor::visit_(const Subscript &c) {
+    const auto &objectRanges = c->getObjectRangesWithPaddings();
+    for (size_t dim = 0; dim < c->getDims(); ++dim) {
+        SimplifyExprVisitor simplifier;
+        auto optional = simplifier.getExprRange(c->getIndex(dim), rangeOp);
+        if (!optional.has_value())
+            continue;
+        const Range &exprRange = *optional;
+        if (exprRange.first < objectRanges[dim].first ||
+            exprRange.second > objectRanges[dim].second) {
+            // dbg("OOB detected!", c, dim, exprRange, objectRanges[dim]);
+            // std::cout << "OOB detected! " << c->toReadable() << ", dim=" <<
+            // dim
+            //           << ", Range=(" << exprRange.first << ", "
+            //           << exprRange.second << "), objRange=("
+            //           << objectRanges[dim].first << ", "
+            //           << objectRanges[dim].second << ")." << std::endl;
+            detect = true;
+        }
+    }
+}
+
+bool CheckOOBVisitor::checkRangeOp(const RangeOp &_rangeOp) {
+    detect = false;
+    rangeOp = _rangeOp;
+    dispatch(rangeOp);
+    return detect;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/CloneMutator.cc b/src/nnet/Visitor/CloneMutator.cc
new file mode 100644
index 00000000..7f411baa
--- /dev/null
+++ b/src/nnet/Visitor/CloneMutator.cc
@@ -0,0 +1,9 @@
+#include "nnet/Visitor/CloneMutator.h"
+
+namespace nnet {
+
+Expr CloneMutator::visit_(const Constant &c) { return c; }
+Expr CloneMutator::visit_(const Var &c) { return c; }
+Expr CloneMutator::visit_(const Tensor &c) { return c; }
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/CompareMultiFormulasVisitor.cc b/src/nnet/Visitor/CompareMultiFormulasVisitor.cc
new file mode 100644
index 00000000..6b697fe5
--- /dev/null
+++ b/src/nnet/Visitor/CompareMultiFormulasVisitor.cc
@@ -0,0 +1,34 @@
+#include "nnet/Visitor/CompareMultiFormulasVisitor.h"
+
+namespace nnet {
+
+bool CompareMultiFormulasVisitor::compare(const VecExpr &roots) {
+    if (roots.empty())
+        return false;
+    vector<RangeOp> rangeOps;
+    for (const auto &root : roots) {
+        if (auto rangeOp = as<RangeOpNode>(root))
+            rangeOps.emplace_back(rangeOp);
+        else
+            return false;
+    }
+    const auto pattern = rangeOps[0];
+    for (auto rangeOp : rangeOps) {
+        if (pattern->getNumOutputDims() != rangeOp->getNumOutputDims()) {
+            return false;
+        }
+        for (int i = 0; i < pattern->getNumOutputDims(); ++i)
+            if (pattern->getVarRange(0, i).second !=
+                rangeOp->getVarRange(0, i).second) {
+                return false;
+            }
+        for (size_t i = 0; i < pattern->getSumVarRanges().size(); ++i)
+            if (pattern->getVarRange(1, i).second !=
+                rangeOp->getVarRange(1, i).second) {
+                return false;
+            }
+    }
+    return true;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/CountRoutineVisitor.cc b/src/nnet/Visitor/CountRoutineVisitor.cc
new file mode 100644
index 00000000..1411c3ea
--- /dev/null
+++ b/src/nnet/Visitor/CountRoutineVisitor.cc
@@ -0,0 +1,38 @@
+#include "nnet/Visitor/CountRoutineVisitor.h"
+
+namespace nnet {
+
+void CountRoutineVisitor::visit_(const Tensor &c) {
+    if (auto routine = c->getSource(); routine) {
+        cnts[routineTypeToId(routine->getType())]++;
+    }
+    ExprTreeVisitor::visit_(c);
+}
+
+vector<int> CountRoutineVisitor::count(const Expr &root) {
+    cnts = vector<int>(RoutineTypeCnt, 0);
+    dispatch(root);
+    return cnts;
+}
+
+bool CountRoutineVisitor::match(const Expr &root, int nMatmul, int nConv,
+                                int nElement, int nSg2bmm,
+                                int nLongformerGBMM) {
+    auto opCount = count(root);
+    bool ret = true;
+    if (opCount[routineTypeToId(RoutineType::MatmulNodeType)] != nMatmul)
+        ret = false;
+    if (opCount[routineTypeToId(RoutineType::ConvNodeType)] != nConv)
+        ret = false;
+    if (opCount[routineTypeToId(RoutineType::ElementWiseNodeType)] != nElement)
+        ret = false;
+    if (opCount.at(routineTypeToId(RoutineType::G2bmmNodeType)) != nSg2bmm)
+        ret = false;
+    if (!ret) {
+        auto target =
+            vector<int>{nMatmul, nConv, nSg2bmm, nLongformerGBMM, nElement};
+    }
+    return ret;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/FullPrinterVisitor.cc b/src/nnet/Visitor/FullPrinterVisitor.cc
new file mode 100644
index 00000000..e5809af5
--- /dev/null
+++ b/src/nnet/Visitor/FullPrinterVisitor.cc
@@ -0,0 +1,58 @@
+#include "nnet/Visitor/FullPrinterVisitor.h"
+
+namespace nnet {
+
+void FullPrinterVisitor::visit_(const Tensor &c) {
+    q.emplace_back(c->getName(), c->getSource(), c);
+}
+
+string FullPrinterVisitor::print(const Expr &root) {
+    q.clear();
+    std::ostringstream oss;
+    dispatch(root);
+    oss << "==> ROOT\n" << root->toReadable() << "\n";
+    for (size_t i = 0; i < q.size(); ++i) {
+        const auto &[name, routine, tensor] = q[i];
+        oss << "==> " << name << " : ";
+        if (routine) {
+            oss << routine->toReadable() << "\n";
+            if (routine->getExpr()) {
+                oss << routine->getExpr()->toReadable() << "\n";
+            } else
+                oss << "[INFO] Source is nullptr \n";
+            if (!routine->getInputs().empty()) {
+                for (const auto &tensor : routine->getInputs())
+                    q.emplace_back(tensor->getName(), tensor->getSource(),
+                                   tensor);
+            } else if (routine->getExpr())
+                dispatch(routine->getExpr());
+        } else
+            oss << "Input Tensor " << tensor->toOutputShape() << "\n";
+    }
+    return oss.str();
+}
+
+const vector<tuple<string, Routine, Tensor>> &
+FullPrinterVisitor::traverse(const Expr &root) {
+    q.clear();
+    dispatch(root);
+    for (size_t i = 0; i < q.size(); ++i) {
+        const auto &[name, routine, tensor] = q[i];
+        if (routine) {
+            // Matmul after DLT do not modify expression, so inputs has a higher
+            // priority. Some OPs such as DLT have not implement source. Then
+            // use inputs
+            if (!routine->getInputs().empty()) {
+                for (const auto &tensor : routine->getInputs())
+                    dispatch(tensor);
+            } else if (routine->getExpr()) {
+                dispatch(routine->getExpr());
+            } else {
+                assert(false);
+            }
+        }
+    }
+    return q;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/GetTensorsVisitor.cc b/src/nnet/Visitor/GetTensorsVisitor.cc
new file mode 100644
index 00000000..380f8ba5
--- /dev/null
+++ b/src/nnet/Visitor/GetTensorsVisitor.cc
@@ -0,0 +1,9 @@
+#include "nnet/Visitor/GetTensorsVisitor.h"
+
+namespace nnet {
+
+void GetTensorsVisitor::visit_(const Tensor &c) {
+    tensors.try_emplace(c->getName(), c);
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/InputVisitor.cc b/src/nnet/Visitor/InputVisitor.cc
new file mode 100644
index 00000000..1c4b9b1c
--- /dev/null
+++ b/src/nnet/Visitor/InputVisitor.cc
@@ -0,0 +1,7 @@
+#include "nnet/Visitor/InputVisitor.h"
+
+namespace nnet {
+
+void InputVisitor::visit_(const Tensor &c) { inputs.emplace_back(c); }
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/Interpreter.cc b/src/nnet/Visitor/Interpreter.cc
new file mode 100644
index 00000000..e580aacf
--- /dev/null
+++ b/src/nnet/Visitor/Interpreter.cc
@@ -0,0 +1,174 @@
+#include "nnet/Visitor/Interpreter.h"
+#include "nnet/Visitor/GetTensorsVisitor.h"
+#include "nnet/expr.h"
+
+namespace nnet {
+
+using ttype = Interpreter::ttype; // Test data type
+using rtype = Interpreter::rtype; // Return data type
+using Position = Interpreter::Position;
+using Inputs = Interpreter::Inputs;
+using Iteration = Interpreter::Iteration;
+
+Inputs Interpreter::genInputStartingFromZero(const RangeOp &range) {
+    Inputs inputs;
+    GetTensorsVisitor getTensorsVisitor;
+    auto tensors = getTensorsVisitor.get(range);
+
+    for (const auto &[name, tensor] : tensors) {
+        auto data = make_ref<vector<int>>(tensor->getSize());
+        for (ssize_t i = 0; i < tensor->getSize(); i++) {
+            data->at(i) = i;
+        }
+        inputs.emplace(name, data);
+    }
+    return inputs;
+}
+
+Interpreter::Interpreter(RangeOp range, int _verbose)
+    : Interpreter(genInputStartingFromZero(range), _verbose){};
+
+rtype Interpreter::visit_(const Constant &c) { return c->getValue(); }
+
+rtype Interpreter::visit_(const BinaryOp &c) {
+    rtype valueL = dispatch(c->getLhs()), valueR = dispatch(c->getRhs());
+
+    switch (c->getOpType()) {
+    case OpType::Add:
+        return valueL + valueR;
+    case OpType::Mul:
+        return valueL * valueR;
+    case OpType::Div:
+        nnet_assert(valueR > 0, "Negative divisor is ill-defeind");
+        return valueL / valueR;
+    case OpType::Mod:
+        nnet_assert(valueR > 0, "Negative divisor is ill-defeind");
+        return valueL % valueR;
+    case OpType::Sub:
+        return valueL - valueR;
+    default:
+        nnet_unimplemented_halt();
+        return -1;
+    }
+}
+
+rtype Interpreter::visit_(const RangeOp &c) {
+    rtype ret = 0;
+    iterations.emplace_back();
+    // loop
+    auto loopRanges = c->getLoopVarRanges();
+    assert(positions.back().size() == loopRanges.size());
+    auto paddings = c->getPaddings();
+    for (int i = 0, iEnd = loopRanges.size(); i < iEnd; i++) {
+        int left = loopRanges[i].second.first;
+        int right = loopRanges[i].second.second;
+        int padding = paddings[i];
+        int element = positions.back()[i];
+        if (0 < padding) {
+            nnet_assert(left - padding <= element, "Out of range");
+            nnet_assert(element < right + padding, "Out of range");
+            if (left <= element && element < right) {
+                iterations.back()[loopRanges[i].first] = positions.back()[i];
+            } else {
+                iterations.pop_back();
+                return 0;
+            }
+        } else {
+            nnet_assert(left <= element, "Out of range");
+            nnet_assert(element < right, "Out of range");
+            iterations.back()[loopRanges[i].first] = positions.back()[i];
+        }
+    }
+    // sum
+    auto sumVarRanges = c->getSumVarRanges();
+    int nSumIters = sumVarRanges.size();
+    if (0 < nSumIters) {
+        vector<int> sumIterValues(nSumIters);
+        for (const auto &[var, range] : sumVarRanges) {
+            sumIterValues.emplace_back(range.first);
+            nnet_assert(range.first < range.second, "No empty range");
+        }
+        // Enumerate all values of sum iterator
+        do {
+            for (int i = 0; i < nSumIters; i++)
+                iterations.back()[sumVarRanges[i].first] = sumIterValues[i];
+            ret += dispatch(c->getSummand());
+
+            // Increase with carry to enumerate sum iterators
+            sumIterValues[nSumIters - 1]++;
+            for (int i = nSumIters - 1; 0 < i; i--) {
+                if (sumIterValues[i] == sumVarRanges[i].second.second) {
+                    sumIterValues[i] = sumVarRanges[i].second.first;
+                    sumIterValues[i - 1]++;
+                }
+            }
+        } while (sumIterValues[0] < sumVarRanges[0].second.second);
+    } else {
+        ret += dispatch(c->getSummand());
+    }
+    iterations.pop_back();
+    return ret;
+}
+
+rtype Interpreter::visit_(const Subscript &c) {
+    int ret = 0;
+    vector<int> idx;
+    auto sub = c->getIndex();
+    for (int i = 0, iEnd = sub.size(); i < iEnd; i++) {
+        idx.emplace_back(dispatch(sub[i]));
+    }
+
+    auto obj = c->getObject();
+    if (obj->getType() == NodeType::RangeOpNodeType) {
+        positions.emplace_back(idx);
+        ret = dispatch(obj);
+        positions.pop_back();
+    } else if (obj->getType() == NodeType::TensorNodeType) {
+        auto tensor = as<TensorNode>(obj);
+        const auto &data = inputs[tensor->getName()];
+        ret = tensor->getData(data, idx);
+    } else
+        assert(false);
+    return ret;
+}
+
+rtype Interpreter::visit_(const Var &c) { return iterations.back()[c]; }
+
+rtype Interpreter::visit_(const Tensor &c) {
+    nnet_unimplemented_halt();
+    return -1;
+}
+
+vector<rtype> Interpreter::interpret(const Expr &expr,
+                                     const vector<Position> &poses) {
+    vector<rtype> ret;
+    for (const auto &pos : poses) {
+        positions.emplace_back(pos);
+        ret.emplace_back(dispatch(expr));
+        positions.pop_back();
+    }
+    return ret;
+}
+
+vector<rtype> Interpreter::interpretUniformSample(const RangeOp &range,
+                                                  int nPoses) {
+    vector<Interpreter::Position> poses;
+    const auto &rangeShape = range->getOutputShape();
+    for (int i = 0; i < nPoses; ++i) {
+        Interpreter::Position pos(range->getNumOutputDims(), 0);
+        ssize_t t = range->getOutputSize() / nPoses * i;
+        for (int j = range->getNumOutputDims() - 1; j >= 0; --j) {
+            int extent = rangeShape[j];
+            pos[j] = t % extent;
+            t /= extent;
+        }
+        poses.emplace_back(pos);
+    }
+    return interpret(range, poses);
+}
+
+vector<rtype> Interpreter::interpretAllOutput(const RangeOp &range) {
+    return interpretUniformSample(range, range->getOutputSize());
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/MatchReshapeVisitor.cc b/src/nnet/Visitor/MatchReshapeVisitor.cc
new file mode 100644
index 00000000..1a1f4d7e
--- /dev/null
+++ b/src/nnet/Visitor/MatchReshapeVisitor.cc
@@ -0,0 +1,46 @@
+#include "nnet/Visitor/MatchReshapeVisitor.h"
+#include "nnet/Visitor/MergeMemboundMutator.h"
+#include "nnet/Visitor/SimplifyExprVisitor.h"
+
+namespace nnet {
+
+bool MatchReshapeVisitor::visit_(const RangeOp &memboundRangeOp) {
+    // Merge nested stages
+    auto rangeOp =
+        as<RangeOpNode>(MergeMemboundMutator({memboundRangeOp}).merge());
+    assert(rangeOp);
+    auto sub = as<SubscriptNode>(rangeOp->getSummand());
+    if (!sub)
+        return false;
+    auto sumRanges = rangeOp->getSumVarRanges();
+    for (auto const &[var, range] : sumRanges) {
+        if (range.second - range.first != 1)
+            return false;
+    }
+
+    const auto objectRanges = sub->getObjectRangesWithoutPaddings();
+    const auto indices = sub->getIndex();
+    Expr indexExpr;
+    int stride = 1;
+    for (int i = indices.size() - 1; i >= 0; --i) {
+        indexExpr = indexExpr + indices.at(i) * stride;
+        stride *= (objectRanges.at(i).second - objectRanges.at(i).first);
+    }
+
+    SimplifyExprVisitor simplifyExprVisitor;
+    simplifyExprVisitor.simplify(indexExpr);
+    auto exprStrides = simplifyExprVisitor.getStrides();
+
+    auto varRanges = rangeOp->getLoopVarRanges();
+    stride = 1;
+    // compare strides of variables in RangeOP and index
+    for (auto i = varRanges.rbegin(); i != varRanges.rend(); ++i) {
+        const bool alwaysZero = i->second.first == 0 && i->second.second == 1;
+        if (!alwaysZero && exprStrides[i->first] != stride)
+            return false;
+        stride *= (i->second.second - i->second.first);
+    }
+    return true;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/MatchTableVisitor.cc b/src/nnet/Visitor/MatchTableVisitor.cc
new file mode 100644
index 00000000..7ea5ede1
--- /dev/null
+++ b/src/nnet/Visitor/MatchTableVisitor.cc
@@ -0,0 +1,56 @@
+#include "nnet/Visitor/MatchTableVisitor.h"
+
+namespace nnet {
+
+void MatchTableVisitor::visit_(const BinaryOp &c, const Tensor &tensor, int dim,
+                               optional<int> stride) {
+    if (c->getOpType() == OpType::Add) {
+        dispatch(c->getLhs(), tensor, dim, stride);
+        dispatch(c->getRhs(), tensor, dim, stride);
+    } else if (c->getOpType() == OpType::Sub) {
+        dispatch(c->getLhs(), tensor, dim, stride);
+        if (stride)
+            *stride = -*stride;
+        dispatch(c->getRhs(), tensor, dim, stride);
+    } else if (c->getOpType() == OpType::Mul) {
+        const optional<int> &lStride = subexprStride[c->getLhs().get()];
+        const optional<int> &rStride = subexprStride[c->getRhs().get()];
+        optional<int> lCurStride =
+            (stride && rStride) ? optional(*stride * *rStride) : nullopt;
+        optional<int> rCurStride =
+            (stride && lStride) ? optional(*stride * *lStride) : nullopt;
+        dispatch(c->getLhs(), tensor, dim, lCurStride);
+        dispatch(c->getRhs(), tensor, dim, rCurStride);
+    } else {
+        hasUnsupportedOp = true;
+    }
+}
+
+void MatchTableVisitor::visit_(const Subscript &c, const Tensor &tensor,
+                               [[maybe_unused]] int dim,
+                               [[maybe_unused]] optional<int> stride) {
+    assert(!tensor); // Should not be set until visit a tensor
+    auto object = as<TensorNode>(c->getObject());
+    assert(object);
+    tensors.emplace_back(object);
+    int currentStride = 1;
+    for (int i = (int)c->getDims() - 1; i >= 0; --i) {
+        this->dispatch(c->getIndex(i), object, i, currentStride);
+        currentStride *= object->getShape(i);
+    }
+}
+void MatchTableVisitor::visit_(const Var &c, const Tensor &tensor, int dim,
+                               optional<int> stride) {
+    appearance.try_emplace(c);
+    appearance[c].emplace_back(pair(tensor, dim));
+    strideTable[c].emplace_back(tensor.get(), dim, stride.value());
+}
+
+void MatchTableVisitor::visit_([[maybe_unused]] const Constant &c,
+                               [[maybe_unused]] const Tensor &tensor,
+                               [[maybe_unused]] int dim,
+                               [[maybe_unused]] optional<int> stride) {
+    return;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/MatmulTransposeMutator.cc b/src/nnet/Visitor/MatmulTransposeMutator.cc
new file mode 100644
index 00000000..3c263d9c
--- /dev/null
+++ b/src/nnet/Visitor/MatmulTransposeMutator.cc
@@ -0,0 +1,100 @@
+#include "nnet/Visitor/MatmulTransposeMutator.h"
+
+namespace nnet {
+
+VecExpr MatmulTransposeMutator::transpose(const Tensor &tensor) {
+    assert(tensor->getDims() == 2);
+    const auto matmul = as<MatmulNode>(tensor->getSource());
+    VecExpr ret;
+    for (int i = 1; i < 8; ++i) {
+        // Whether really transpose/swap AB. transa/b are the arguments for gemm
+        const int Atrans = (i & 1) > 0;
+        const int Btrans = (i & 2) > 0;
+        const int ABswap = (i & 4) > 0;
+
+        auto newShape = tensor->getShape();
+        auto newPaddings = tensor->getPaddings();
+        auto [b, m, n, k, transa, transb] = matmul->getArgs();
+        auto inputs = matmul->getInputs();
+        transa ^= Atrans;
+        transb ^= Btrans;
+        // build input transpose
+        if (Atrans)
+            inputs[0] = transposeInput(inputs[0]);
+        if (Btrans)
+            inputs[1] = transposeInput(inputs[1]);
+        if (ABswap) {
+            std::swap(inputs[0], inputs[1]);
+            std::swap(m, n);
+            std::swap(transa, transb);
+            std::swap(newShape[0], newShape[1]);
+            std::swap(newPaddings[0], newPaddings[1]);
+            transa ^= 1;
+            transb ^= 1;
+        }
+        // build new Gemm Routine and Tensor
+        // HACK: trivially wrap the source to generate different hash
+        auto _va = make_ref<VarNode>("transA");
+        auto _vb = make_ref<VarNode>("transB");
+        auto _vc = make_ref<VarNode>("swapAB");
+        auto fakeSub = makeSubscript(matmul->getExpr(), {_va, _vb});
+        auto fakeRangeWrapperForHackHash =
+            makeRangeOperator({{_va, {0, Atrans + 100}},
+                               {_vb, {0, Btrans + 100}},
+                               {_vc, {0, ABswap + 100}}},
+                              {}, fakeSub);
+        Matmul newMatmul =
+            make_ref<MatmulNode>(fakeRangeWrapperForHackHash, inputs[0],
+                                 inputs[1], b, m, n, k, transa, transb);
+        auto newTensor = makeTensor(derivator.newTensorName(), newShape,
+                                    newPaddings, newMatmul);
+        // build output transpose
+        if (ABswap) {
+            vector<Var> vars{derivator.getNewVar(), derivator.getNewVar()};
+            auto sub = makeSubscript(newTensor, {vars[1], vars[0]});
+            vector<VarRangePair> loopVRs;
+            // Sicne inputs array may be swaped, use the orignal tensor shape
+            for (int i = 0; i < 2; ++i) {
+                loopVRs.emplace_back(vars[i], Range(0, tensor->getShape(i)));
+            }
+            auto rangeOp = makeRangeOperator(loopVRs, {}, sub);
+            ret.emplace_back(rangeOp);
+        } else
+            ret.emplace_back(newTensor);
+    }
+    return ret;
+}
+
+Tensor MatmulTransposeMutator::transposeInput(const Tensor &tensor) {
+    Tensor ret;
+    if (auto ew = as<ElementWiseNode>(tensor->getSource())) {
+        auto rangeOp = as<RangeOpNode>(tensor->getSource()->getExpr());
+        assert(rangeOp);
+        assert(rangeOp->getNumOutputDims() == 2);
+        auto loopVRs = rangeOp->getLoopVarRanges();
+        std::swap(loopVRs[0], loopVRs[1]);
+        // If there are paddings, the inner stage paddings should be removed
+        assert(!rangeOp->hasPaddings());
+        // auto paddings = rangeOp->getPaddings();
+        // std::swap(paddings[0], paddings[1]);
+        auto sub = makeSubscript(rangeOp, {loopVRs[1].first, loopVRs[0].first});
+        auto newRangeOp = makeRangeOperator(loopVRs, {}, sub);
+        // ElementWise newElementWise = make_ref<ElementWiseNode>(*ew);
+        auto outputShape = ew->getOutputShape();
+        std::swap(outputShape[0], outputShape[1]);
+        auto newElementWise =
+            make_ref<ElementWiseNode>(newRangeOp, ew->getInputs(), outputShape);
+
+        auto tensorShape = tensor->getShape();
+        auto tensorPaddings = tensor->getPaddings();
+        std::swap(tensorShape[0], tensorShape[1]);
+        std::swap(tensorPaddings[0], tensorPaddings[1]);
+        ret = makeTensor(derivator.newTensorName(), tensorShape, tensorPaddings,
+                         newElementWise);
+        // } else if (!tensor->getSource()) {
+    } else
+        nnet_unimplemented_halt();
+    return ret;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/MergeMemboundMutator.cc b/src/nnet/Visitor/MergeMemboundMutator.cc
new file mode 100644
index 00000000..9fce6928
--- /dev/null
+++ b/src/nnet/Visitor/MergeMemboundMutator.cc
@@ -0,0 +1,135 @@
+#include "nnet/Visitor/MergeMemboundMutator.h"
+#include "nnet/Visitor/CheckOOBVisitor.h"
+#include "nnet/Visitor/ReplaceNodeMutator.h"
+#include "nnet/Visitor/ReplaceVariable.h"
+
+namespace nnet {
+
+Expr MergeMemboundMutator::merge(bool allowEmptyMembound) {
+    // FIXME: fix empty expression in membound
+    assert(kernels.size() >= 1);
+    if (checkEmpty()) {
+        if (allowEmptyMembound)
+            return nullptr;
+        else
+            nnet_assert(false, "Empty membound expression");
+    }
+    // Nesting stages
+    auto expr = dispatch(kernels.back());
+    // Fusing stages
+    bool merged = false;
+    do {
+        merged = false;
+        RangeOp curRangeOp;
+        for (Expr *curExpr = &expr;
+             curExpr && (curRangeOp = as<RangeOpNode>(*curExpr));) {
+            auto curRangeOp = as<RangeOpNode>(*curExpr);
+            assert(CheckOOBVisitor().checkRangeOp(curRangeOp) == false);
+            auto summand = curRangeOp->getSummand();
+            if (auto subscriptOp = as<SubscriptNode>(summand)) {
+                if (auto mergedExpr = rule4StageMerging(*curExpr, true)) {
+                    // dbg(*curExpr, mergedExpr);
+                    *curExpr = mergedExpr;
+                    merged = true;
+                    break;
+                }
+                curExpr = subscriptOp->getObjectPtr();
+                nnet_assert(*curExpr != nullptr, __LINE__);
+            } else if (auto funcOp = as<FuncNode>(summand)) {
+                // Relu({...}[i,j])
+                curExpr = funcOp->getObject()->getObjectPtr();
+            } else
+                nnet_unimplemented_halt();
+        }
+    } while (merged);
+    return expr;
+}
+
+bool MergeMemboundMutator::checkEmpty() {
+    for (const auto &k : kernels) {
+        if (k == nullptr)
+            return true;
+    }
+    return false;
+}
+
+Expr MergeMemboundMutator::visit_(const Tensor &c) {
+    if (curDepth > 0)
+        return dispatch(kernels[--curDepth]);
+    else {
+        // Reach the last tensor, return it to reconstruct the total tree
+        return c;
+    }
+}
+
+Expr MergeMemboundMutator::rule4StageMerging(Expr &rCur,
+                                             bool mergeStageWithCalc) {
+    auto rangeOp0 = as<RangeOpNode>(rCur);
+    const Subscript &sub0 = as<SubscriptNode>(rangeOp0->getSummand());
+    if (!sub0)
+        return nullptr;
+    const auto &rangeOp1 = as<RangeOpNode>(sub0->getObject());
+    if (!rangeOp1)
+        return nullptr;
+    const auto &sub1 = as<SubscriptNode>(rangeOp1->getSummand());
+    if (!sub1)
+        return nullptr;
+    // merge stage with calculation only when mergeStageWithCalc=true
+    if (!mergeStageWithCalc && !rangeOp1->getSumVarRanges().empty())
+        return nullptr;
+    // Only propogate paddings in perfect nested dimension
+    if (rangeOp1->hasPaddings()) {
+        auto oldTensor = as<TensorNode>(sub1->getObject());
+        if (!oldTensor) {
+            nnet_unimplemented_continue();
+            return nullptr;
+        }
+    }
+    // repalce variables: iters of rangeOp1 repalced by indexes of sub0
+    map<string, pair<Expr, Expr>> varMapping;
+    assert(sub0->getDims() == rangeOp1->getLoopVarRanges().size());
+    for (size_t i = 0; i < sub0->getDims(); ++i) {
+        varMapping[rangeOp1->getLoopVar(i)->getName()] =
+            pair(rangeOp1->getLoopVar(i), sub0->getIndex(i));
+    }
+    ReplaceVariable replaceVariable{varMapping};
+    auto merged = make_ref<RangeOpNode>(*rangeOp0);
+    merged->setSummand(replaceVariable(sub1));
+    // a naive approach to propogate paddings
+    if (rangeOp1->hasPaddings()) {
+        auto oldTensor = as<TensorNode>(sub1->getObject());
+        auto newTensor = make_ref<TensorNode>(*oldTensor);
+        for (int i = 0; i < rangeOp1->getNumOutputDims(); ++i) {
+            if (rangeOp1->getPaddings(i) == 0)
+                continue;
+            auto loopVar = rangeOp1->getLoopVar(i);
+            // FIXME: in fact this var should not appear in other index as well,
+            // which may result in OOB
+            bool findSingleVarAsIndex = false;
+            for (size_t subIndexID = 0; subIndexID < sub1->getDims();
+                 ++subIndexID) {
+                auto index = sub1->getIndex(subIndexID);
+                if (auto indexVar = as<VarNode>(index);
+                    indexVar && (indexVar->equal(loopVar))) {
+                    newTensor->setPadding(subIndexID,
+                                          newTensor->getPadding(subIndexID) +
+                                              rangeOp1->getPaddings(i));
+                    findSingleVarAsIndex = true;
+                }
+            }
+            if (!findSingleVarAsIndex) {
+                nnet_unimplemented_continue();
+                return nullptr;
+            }
+        }
+        merged = as<RangeOpNode>(
+            ReplaceNodeMutator().replace(merged, oldTensor.get(), newTensor));
+        assert(merged != nullptr);
+    }
+    // Merge inner stage sums
+    if (!rangeOp1->getSumVarRanges().empty())
+        merged->setSumIterator(rangeOp1->getSumVarRanges());
+    return merged;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/PatternMatcher.cc b/src/nnet/Visitor/PatternMatcher.cc
new file mode 100644
index 00000000..326ab45f
--- /dev/null
+++ b/src/nnet/Visitor/PatternMatcher.cc
@@ -0,0 +1,124 @@
+#include "nnet/Visitor/PatternMatcher.h"
+#include "nnet/ReplaceKit.h"
+#include "nnet/Visitor/MatmulTransposeMutator.h"
+#include "nnet/Visitor/ReplaceVariable.h"
+
+namespace nnet {
+
+PatternMatcher::PatternMatcher(Derivator &derivator, const RangeOp &cur)
+    : Functor(false), derivator(derivator), hasNonZeroRange(false),
+      originalCur(cur) {
+    for (const auto &[var, range] : cur->getLoopVarRanges())
+        if (range.first != 0)
+            hasNonZeroRange = true;
+}
+
+RangeOp PatternMatcher::getOffsetCur() {
+    if (!hasNonZeroRange)
+        return originalCur;
+    vector<Var> itersFromNonZero;
+    VecExpr psis;
+    vector<VarRangePair> newLoopVarRanges;
+    for (const auto &[var, range] : originalCur->getLoopVarRanges()) {
+        if (range.first == 0) {
+            newLoopVarRanges.emplace_back(var, range);
+        } else {
+            auto newVar = derivator.getNewVar();
+            newLoopVarRanges.emplace_back(newVar,
+                                          pair(0, range.second - range.first));
+            itersFromNonZero.emplace_back(var);
+            psis.emplace_back(newVar + range.first);
+        }
+    }
+    auto newSummand = ReplaceKit::replaceMultipleExprs(
+        originalCur->getSummand(), itersFromNonZero, psis);
+    return makeRangeOperator(newLoopVarRanges, originalCur->getSumVarRanges(),
+                             newSummand);
+}
+
+VecExpr PatternMatcher::matchKernel(const Pattern &pattern,
+                                    const RangeOp &rangeOp,
+                                    IteratorTable &exprIT) {
+    VecExpr ret;
+    if (pattern.getNumTensors() != (int)exprIT.getNumTensors())
+        return ret;
+
+    // Whether enable tensor permutation
+    if (false) {
+        const int nInputs = pattern.getNumInputs();
+        vector<int> tensorMap; // [tensors Index] -> pattern tensor ID
+        for (int i = 0; i < nInputs; ++i)
+            tensorMap.emplace_back(i);
+        do {
+            exprIT.buildTable(tensorMap);
+            auto matched = matchKernelWithTensorMap(pattern, rangeOp, exprIT);
+            if (matched)
+                ret.emplace_back(matched);
+        } while (std::next_permutation(tensorMap.begin(), tensorMap.end()));
+    } else {
+        exprIT.buildTableWithDefaultMap();
+        auto matched = matchKernelWithTensorMap(pattern, rangeOp, exprIT);
+        if (matched)
+            ret.emplace_back(matched);
+    }
+    // Generate 8 variants of gemm
+    if (true) // Disabled for debug
+        if (!ret.empty() && dynamic_cast<const MatmulPattern *>(&pattern)) {
+            auto tensor = as<TensorNode>(ret[0]);
+            auto transposeds =
+                MatmulTransposeMutator(derivator).transpose(tensor);
+            for (const auto &transposed : transposeds)
+                ret.emplace_back(transposed);
+        }
+    return ret;
+}
+
+Expr PatternMatcher::matchKernelWithTensorMap(const Pattern &pattern,
+                                              const RangeOp &rangeOp,
+                                              IteratorTable &exprIT) {
+    auto mismatches = exprIT.matchPatternIT(pattern);
+    if (!mismatches.empty())
+        return nullptr;
+
+    const auto &[tensorMap_r, iterToRange_r] = exprIT.getReverseMap();
+    // // TODO: check OOB error
+    // for (int tensorID = 0; tensorID < pattern.getNumInputs(); ++tensorID) {
+    //     if (!checkIndexOutOfBound(pattern.getIterInTensorDim(tensorID),
+    //                      tensorMap_r[tensorID], iterToRange_r))
+    //         return nullptr;
+    // }
+
+    // matched! build expr for ret;
+    return pattern.buildExpr(rangeOp, tensorMap_r, iterToRange_r,
+                             derivator.newTensorName(), exprIT);
+}
+
+VecExpr PatternMatcher::applyWrapper(const VecExpr &exprs) {
+    if (!hasNonZeroRange)
+        return exprs;
+    VecExpr ret, indexes;
+    for (const auto &[var, range] : originalCur->getLoopVarRanges()) {
+        if (range.first == 0) {
+            indexes.emplace_back(var);
+        } else {
+            hasNonZeroRange = true;
+            indexes.emplace_back(var - range.first);
+        }
+    }
+    for (auto &expr : exprs) {
+        auto newSub = makeSubscript(expr, indexes);
+        ret.emplace_back(makeRangeOperator(originalCur->getLoopVarRanges(), {},
+                                           newSub, originalCur->getPaddings()));
+    }
+    return ret;
+}
+
+VecExpr PatternMatcher::matchWithPattern(const RangeOp &rangeOp,
+                                         const Pattern &pattern) {
+    IteratorTable exprIT;
+    if (!exprIT.analyzeExpr(rangeOp))
+        return {};
+    return matchKernel(pattern, rangeOp, exprIT);
+}
+
+} // namespace nnet
diff --git a/src/nnet/Visitor/RangeMagnifyVisitor.cc b/src/nnet/Visitor/RangeMagnifyVisitor.cc
new file mode 100644
index 00000000..ea042dc1
--- /dev/null
+++ b/src/nnet/Visitor/RangeMagnifyVisitor.cc
@@ -0,0 +1,56 @@
+#include "nnet/Visitor/RangeMagnifyVisitor.h"
+#include "nnet/Visitor/SimplifyExprVisitor.h"
+
+namespace nnet {
+
+Expr RangeMagnifyVisitor::visit_(const RangeOp &c) {
+    if (auto expr = Mutator::visit_(c)) {
+        auto ret = as<RangeOpNode>(expr);
+        ret->setSumIterator(newSumVarRanges);
+        return ret;
+    } else
+        return nullptr;
+}
+
+Expr RangeMagnifyVisitor::visit_(const Subscript &c) {
+    auto tensor = as<TensorNode>(c->getObject());
+    if (!tensor)
+        return nullptr;
+    // Check new ranges
+    bool paddingMagnify = false;
+    vector<Range> tensorRanges = c->getObjectRangesWithPaddings();
+    vector<int> paddingsDelta(tensorRanges.size(), 0);
+    for (int i = 0; i < (int)c->getDims(); ++i) {
+        auto indexRange =
+            SimplifyExprVisitor().getExprRange(c->getIndex(i), newRangeOp);
+        if (!indexRange.has_value())
+            return nullptr;
+        int delta = max(tensorRanges[i].first - indexRange->first,
+                        indexRange->second - tensorRanges[i].second);
+        if (delta > 0) {
+            paddingMagnify = true;
+            paddingsDelta[i] = delta;
+        }
+    }
+    if (!paddingMagnify)
+        return nullptr;
+    // Create new tensor. Direct add paddings to the Tensor.
+    auto newTensor = make_ref<TensorNode>(*tensor);
+    for (int i = 0; i < newTensor->getDims(); ++i)
+        newTensor->setPadding(i, newTensor->getPadding(i) + paddingsDelta[i]);
+    auto newSub = make_ref<SubscriptNode>(*c);
+    newSub->setObject(newTensor);
+    return newSub;
+}
+
+RangeOp
+RangeMagnifyVisitor::magnify(const RangeOp &root,
+                             const vector<VarRangePair> &_newSumVarRanges) {
+    newSumVarRanges = _newSumVarRanges;
+    newRangeOp = make_ref<RangeOpNode>(*root);
+    newRangeOp->setSumIterator(newSumVarRanges);
+    const auto &newCur = as<RangeOpNode>(dispatch(root));
+    return newCur;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/RangeRelaxFunctor.cc b/src/nnet/Visitor/RangeRelaxFunctor.cc
new file mode 100644
index 00000000..9e1a6d67
--- /dev/null
+++ b/src/nnet/Visitor/RangeRelaxFunctor.cc
@@ -0,0 +1,133 @@
+#include "nnet/Visitor/RangeRelaxFunctor.h"
+#include "nnet/Visitor/SimplifyExprVisitor.h"
+
+namespace nnet {
+
+RangeMap RangeRelaxFunctor::visit_(const BinaryOp &c) {
+    if (verbose)
+        dbg(*c);
+    if (c->getOpType() == OpType::Mul)
+        return intersectRangeMaps(dispatch(c->getLhs()), dispatch(c->getRhs()));
+    nnet_unimplemented_halt();
+    return RangeMap();
+}
+RangeMap RangeRelaxFunctor::visit_(const RangeOp &c) {
+    if (verbose)
+        dbg(*c);
+    return dispatch(c->getSummand());
+}
+RangeMap RangeRelaxFunctor::visit_(const Subscript &c) {
+    if (verbose)
+        dbg(*c);
+    RangeMap ret;
+    const auto &tensor = as<TensorNode>(c->getObject());
+    for (size_t i = 0; i < c->getDims(); ++i) {
+        const int padding = tensor->getPadding(i);
+        const int length = tensor->getShape(i);
+        if (!padding)
+            continue;
+        // TODO assert access within padding+length
+        // If the index is a single Var
+        if (auto var = as<VarNode>(c->getIndex(i))) {
+            ret = intersectRangeMaps(ret, {{var, {0, length}}});
+        } else { // If the index is linear expression
+            const auto &[strides, offset] =
+                SimplifyExprVisitor().getStridesConstant(c->getIndex(i));
+            // // Calculate the sum of ranges of all iters in negative value
+            // Range allRange(-offset, -offset);
+            // for (const auto &[iter, stride] : strides) {
+            //     auto iterRange = rangeOp->getVarRange(iter).second;
+            //     if (stride > 0) {
+            //         allRange.first -= stride * (iterRange.second - 1);
+            //         allRange.second -= stride * iterRange.first;
+            //     } else {
+            //         allRange.first += stride * iterRange.first;
+            //         allRange.second += stride * (iterRange.second - 1);
+            //     }
+            //     dbg(iter, stride, iterRange, allRange);
+            // }
+            // dbg(allRange);
+            // // Calculate the meaningful ranges for each iter
+            // for (const auto &[iter, stride] : strides) {
+            //     auto iterRange = rangeOp->getVarRange(iter).second;
+            //     auto rangeExceptThis{allRange};
+            //     if (stride > 0) {
+            //         rangeExceptThis.first += stride * (iterRange.second - 1);
+            //         rangeExceptThis.second += stride * iterRange.first;
+            //     } else {
+            //         rangeExceptThis.first -= stride * iterRange.first;
+            //         rangeExceptThis.second -= stride * (iterRange.second -
+            //         1);
+            //     }
+            //     // Meaningful calculation range for current iter
+            //     int l, r;
+            //     if (stride > 0) {
+            //         // l = (0 - rangeExceptThis.second + stride - 1) /
+            //         stride;
+            //         // r = (length - rangeExceptThis.first) / stride;
+            //         l = (0 - rangeExceptThis.second + stride - 1) / stride;
+            //         r = (length - 1 - rangeExceptThis.first) / stride + 1;
+            //     } else {
+            //         nnet_unimplemented_continue();
+            //         continue;
+            //     }
+            //     dbg(iter, stride, iterRange, l, r);
+            //     ret = intersectRangeMaps(ret, {{iter, {l, r}}});
+            // }
+            // Calculate the sum of ranges of all iters in negative value
+            Range allRange(offset, offset);
+            for (const auto &[iter, stride] : strides) {
+                auto iterRange = rangeOp->getVarRange(iter).second;
+                if (stride > 0) {
+                    allRange.first += stride * iterRange.first;
+                    allRange.second += stride * (iterRange.second - 1);
+                } else {
+                    allRange.first += stride * (iterRange.second - 1);
+                    allRange.second += stride * iterRange.first;
+                }
+                // dbg(iter, stride, iterRange, allRange);
+            }
+            // Calculate the meaningful ranges for each iter
+            for (const auto &[iter, stride] : strides) {
+                auto iterRange = rangeOp->getVarRange(iter).second;
+                auto rangeExceptThis{allRange};
+                if (stride > 0) {
+                    rangeExceptThis.first -= stride * iterRange.first;
+                    rangeExceptThis.second -= stride * (iterRange.second - 1);
+                } else {
+                    rangeExceptThis.first -= stride * (iterRange.second - 1);
+                    rangeExceptThis.second -= stride * iterRange.first;
+                }
+                // Meaningful calculation range for current iter
+                int l, r;
+                if (stride > 0) {
+                    // l = (0 - rangeExceptThis.second + stride - 1) / stride;
+                    // r = (length - rangeExceptThis.first) / stride;
+                    l = (0 - rangeExceptThis.second + stride - 1) / stride;
+                    r = (length - 1 - rangeExceptThis.first) / stride + 1;
+                } else {
+                    nnet_unimplemented_continue();
+                    continue;
+                }
+                ret = intersectRangeMaps(ret, {{iter, {l, r}}});
+            }
+        }
+    }
+    return ret;
+}
+
+RangeMap RangeRelaxFunctor::intersectRangeMaps(const RangeMap &a,
+                                               const RangeMap &b) {
+    RangeMap ret(a);
+    for (const auto &[k, v] : b) {
+        if (!ret.count(k))
+            ret[k] = v;
+        else {
+            auto const &u = ret[k];
+            ret[k] = {max(u.first, v.first), min(u.second, v.second)};
+        }
+    }
+    return ret;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/ReplaceNodeMutator.cc b/src/nnet/Visitor/ReplaceNodeMutator.cc
new file mode 100644
index 00000000..5a6ed96e
--- /dev/null
+++ b/src/nnet/Visitor/ReplaceNodeMutator.cc
@@ -0,0 +1,23 @@
+#include "nnet/Visitor/ReplaceNodeMutator.h"
+
+namespace nnet {
+
+Expr ReplaceNodeMutator::visit_(const Subscript &c) {
+    if (c.get() == target)
+        return replacement;
+    return Mutator::visit_(c);
+}
+Expr ReplaceNodeMutator::visit_(const Tensor &c) {
+    if (c.get() == target)
+        return replacement;
+    return nullptr;
+}
+
+Expr ReplaceNodeMutator::replace(const Expr &root, ExprNode *_target,
+                                 const Expr &_replace) {
+    target = _target;
+    replacement = _replace;
+    return dispatch(root);
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/ReplaceVariable.cc b/src/nnet/Visitor/ReplaceVariable.cc
new file mode 100644
index 00000000..74a36776
--- /dev/null
+++ b/src/nnet/Visitor/ReplaceVariable.cc
@@ -0,0 +1,50 @@
+#include "nnet/Visitor/ReplaceVariable.h"
+
+namespace nnet {
+
+Expr ReplaceVariable::visit_(const BinaryOp &c) {
+    if (verbose)
+        dbg(*c);
+    if (auto mutate = match(c); mutate)
+        return mutate;
+    else
+        return Mutator::visit_(c);
+}
+
+Expr ReplaceVariable::visit_(const Var &c) {
+    if (verbose)
+        dbg(*c);
+    if (auto mutate = match(c); mutate)
+        return mutate;
+    else
+        return Mutator::visit_(c);
+}
+
+Expr ReplaceVariable::visit_(const RangeOp &c) {
+    if (verbose)
+        dbg(*c);
+    return nullptr;
+}
+
+void ReplaceVariable::set(VecExpr _pattern, VecExpr _replacement) {
+    patterns = _pattern;
+    replacements = _replacement;
+    for (size_t i = 0; i < patterns.size(); ++i) {
+        auto hash = patterns[i]->hash();
+        assert(patternHash.count(hash) == 0);
+        patternHash[hash] = i;
+    }
+}
+
+Expr ReplaceVariable::match(const Expr &c) {
+    auto hash = c->hash();
+    if (auto it = patternHash.find(hash); it != patternHash.end()) {
+        const auto &i = it->second;
+        if (verbose)
+            dbg("Match", *c, *patterns[i], c->hash());
+        return replacements[i];
+    }
+    return nullptr;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/Serializer.cc b/src/nnet/Visitor/Serializer.cc
new file mode 100644
index 00000000..e3f34b4b
--- /dev/null
+++ b/src/nnet/Visitor/Serializer.cc
@@ -0,0 +1,245 @@
+#include "nnet/Visitor/Serializer.h"
+#include "nlohmann/json.hpp"
+#include "nnet/expr.h"
+#include <fstream>
+
+namespace nnet {
+
+int Serializer::id = 0;
+
+Serializer::Serializer(int _verobse)
+    : Functor(_verobse), jPtr(std::make_unique<json>()), j(*jPtr) {}
+
+Serializer::~Serializer() = default;
+
+string Serializer::visit_(const Constant &c) {
+    string key = std::to_string(id++);
+    j[key]["type"] = c->getType();
+    j[key]["val"] = c->getValue();
+    return key;
+}
+
+string Serializer::visit_(const BinaryOp &c) {
+    string key = std::to_string(id++);
+    j[key]["type"] = c->getType();
+    j[key]["opType"] = (int)c->getOpType();
+    j[key]["lhs"] = dispatch(c->getLhs());
+    j[key]["rhs"] = dispatch(c->getRhs());
+    return key;
+}
+
+string Serializer::visit_(const RangeOp &c) {
+    string key = std::to_string(id++);
+    j[key]["type"] = c->getType();
+    j[key]["paddings"] = c->getPaddings();
+    j[key]["summand"] = dispatch(c->getSummand());
+    for (auto var : c->getLoopVarRanges()) {
+        j[key]["loopVarRanges"][var.first->getName()] = var.second;
+    }
+    for (auto var : c->getSumVarRanges()) {
+        j[key]["sumVarRanges"][var.first->getName()] = var.second;
+    }
+    return key;
+}
+
+string Serializer::visit_(const Subscript &c) {
+    string key = std::to_string(id++);
+    j[key]["type"] = c->getType();
+    j[key]["subExprsNum"] = c->getDims();
+    j[key]["object"] = dispatch(c->getObject());
+    vector<string> indexes;
+    for (auto index : c->getIndex()) {
+        indexes.emplace_back(dispatch(index));
+    }
+    j[key]["indexes"] = indexes;
+    return key;
+}
+
+string Serializer::visit_(const Var &c) {
+    string key = std::to_string(id++);
+    j[key]["type"] = c->getType();
+    j[key]["name"] = c->getName();
+    return key;
+}
+
+string Serializer::visit_(const Tensor &c) {
+    const string key = std::to_string(id++);
+    j[key]["type"] = c->getType();
+    j[key]["name"] = c->getName();
+    j[key]["shape"] = c->getShape();
+    j[key]["paddings"] = c->getPaddings();
+    const auto &routine = c->getSource();
+    j[key]["source"] = dispatchRoutine(routine);
+    return key;
+}
+
+bool Serializer::serialize(const Expr &expr, const string &filePath,
+                           const string &msg) {
+    // Metadata
+    j["Version"] = VERSION;
+    j["Msg"] = msg;
+    // Expressions and routines
+    id = 0;
+    dispatch(expr);
+    std::ofstream fout(filePath);
+    fout << std::setw(4) << j << std::endl;
+    return true;
+}
+
+string Serializer::dispatchRoutine(const Routine &c) {
+    if (!c)
+        return "-1";
+    const string key = std::to_string(id++);
+    j[key]["type"] = c->getType();
+
+    vector<string> inputs;
+    for (const auto &tensor : c->getInputs())
+        inputs.emplace_back(dispatch(tensor));
+    j[key]["inputs"] = inputs;
+
+    if (const auto &expr = c->getExpr())
+        j[key]["expr"] = dispatch(expr);
+    else
+        j[key]["expr"] = "-1";
+
+    switch (c->getType()) {
+    case RoutineType::NoneType:
+        nnet_unimplemented_halt();
+        break;
+    case RoutineType::MatmulNodeType: {
+        j[key]["args"] = as<MatmulNode>(c)->getArgs();
+        break;
+    }
+    case RoutineType::ConvNodeType:
+        j[key]["args"] = as<ConvNode>(c)->getArgs();
+        break;
+    case RoutineType::G2bmmNodeType:
+        j[key]["args"] = as<G2bmmNode>(c)->getArgs();
+        break;
+    case RoutineType::GbmmNodeType:
+        j[key]["args"] = as<GbmmNode>(c)->getArgs();
+        break;
+    case RoutineType::ElementWiseNodeType: {
+        j[key]["outputShape"] = as<ElementWiseNode>(c)->getOutputShape();
+        break;
+    }
+    default:
+        nnet_unimplemented_halt();
+    }
+    return key;
+}
+
+Expr Serializer::deserialize(const string &filePath) {
+    std::ifstream fin(filePath);
+    fin >> j;
+    assert(j["Version"] == VERSION);
+    return buildExprTree("0");
+}
+
+Expr Serializer::buildExprTree(string key) {
+    switch (NodeType(j[key]["type"])) {
+    case NodeType::ConstantNodeType: {
+        return make_ref<ConstantNode>(j[key]["val"]);
+    }
+    case NodeType::BinaryOpNodeType: {
+        auto lhs = buildExprTree(j[key]["lhs"]);
+        auto rhs = buildExprTree(j[key]["rhs"]);
+        return make_ref<BinaryOpNode>(j[key]["opType"], lhs, rhs);
+    }
+    case NodeType::RangeOpNodeType: {
+        vector<VarRangePair> loopIters, sumIters;
+        for (auto &loopIter : j[key]["loopVarRanges"].items()) {
+            loopIters.emplace_back(
+                pair(make_ref<VarNode>(loopIter.key()),
+                     pair(loopIter.value()[0], loopIter.value()[1])));
+        }
+        for (auto &sumIter : j[key]["sumVarRanges"].items()) {
+            sumIters.emplace_back(
+                pair(make_ref<VarNode>(sumIter.key()),
+                     pair(sumIter.value()[0], sumIter.value()[1])));
+        }
+        auto summand = buildExprTree(j[key]["summand"]);
+        auto paddings = j[key]["paddings"].get<std::vector<int>>();
+        auto rangeOp = makeRangeOperator(loopIters, sumIters, summand);
+        rangeOp->setPaddings(paddings);
+        return rangeOp;
+    }
+    case NodeType::SubscriptNodeType: {
+        auto indexed = buildExprTree(j[key]["object"]);
+        VecExpr subExprs;
+        for (int i = 0, iEnd = j[key]["subExprsNum"]; i < iEnd; i++) {
+            subExprs.emplace_back(buildExprTree(j[key]["indexes"][i]));
+        }
+        return make_ref<SubscriptNode>(indexed, subExprs);
+    }
+    case NodeType::VarNodeType: {
+        return make_ref<VarNode>(j[key]["name"]);
+    }
+    case NodeType::TensorNodeType: {
+        auto source = buildRoutine(j[key]["source"]);
+        return make_ref<TensorNode>(j[key]["name"], j[key]["shape"],
+                                    j[key]["paddings"], source);
+    }
+    default: {
+        nnet_unimplemented_halt();
+        break;
+    }
+    }
+    return nullptr;
+}
+
+Routine Serializer::buildRoutine(string key) {
+    if (key == "-1")
+        return nullptr;
+    Expr expr = nullptr;
+    if (j[key]["expr"] != "-1")
+        expr = buildExprTree(j[key]["expr"]);
+    vector<Tensor> inputs;
+    for (const auto &input : j[key]["inputs"])
+        inputs.emplace_back(as<TensorNode>(buildExprTree(input)));
+
+    switch (RoutineType(j[key]["type"])) {
+    case RoutineType::NoneType:
+        nnet_unimplemented_halt();
+        break;
+    case RoutineType::MatmulNodeType: {
+        assert(inputs.size() == 2);
+        auto args = j[key]["args"].get<MatmulArgs>();
+        auto ctorArgs =
+            std::tuple_cat(std::tie(expr, inputs[0], inputs[1]), args);
+        return make_ref_from_tuple<MatmulNode>(ctorArgs);
+        break;
+    }
+    case RoutineType::ConvNodeType: {
+        assert(inputs.size() == 2);
+        auto args = j[key]["args"].get<ConvArgs>();
+        auto ctorArgs =
+            std::tuple_cat(std::tie(expr, inputs[0], inputs[1]), args);
+        return make_ref_from_tuple<ConvNode>(ctorArgs);
+        break;
+    }
+    case RoutineType::G2bmmNodeType: {
+        auto args = j[key]["args"].get<G2bmmArgs>();
+        auto ctorArgs =
+            std::tuple_cat(std::tie(expr, inputs[0], inputs[1]), args);
+        return make_ref_from_tuple<G2bmmNode>(ctorArgs);
+        break;
+    }
+    case RoutineType::GbmmNodeType: {
+        auto args = j[key]["args"].get<GbmmArgs>();
+        auto ctorArgs =
+            std::tuple_cat(std::tie(expr, inputs[0], inputs[1]), args);
+        return make_ref_from_tuple<GbmmNode>(ctorArgs);
+        break;
+    }
+    case RoutineType::ElementWiseNodeType: {
+        return make_ref<ElementWiseNode>(expr, inputs, j[key]["outputShape"]);
+        break;
+    }
+    default:
+        nnet_unimplemented_halt();
+    }
+    return nullptr;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/SimplifyExprVisitor.cc b/src/nnet/Visitor/SimplifyExprVisitor.cc
new file mode 100644
index 00000000..5f98325e
--- /dev/null
+++ b/src/nnet/Visitor/SimplifyExprVisitor.cc
@@ -0,0 +1,160 @@
+#include "nnet/Visitor/SimplifyExprVisitor.h"
+#include "nnet/Visitor/StrideVisitor.h"
+
+namespace nnet {
+
+pair<PtrMap<Iterator, int>, int>
+SimplifyExprVisitor::getStridesConstant(const Expr &expr) {
+    constant = 0;
+    divStrides.clear();
+    modStrides.clear();
+    subexprStride = StrideVisitor().getExprStride(expr);
+    strides.clear();
+    dispatch(expr, 1);
+    return {strides, constant};
+}
+
+optional<Range> SimplifyExprVisitor::getExprRange(const Expr &expr,
+                                                  const RangeOp &rangeOp) {
+    constant = 0;
+    divStrides.clear();
+    modStrides.clear();
+    subexprStride = StrideVisitor().getExprStride(expr);
+    strides.clear();
+    dispatch(expr, 1);
+    // Skip if there is divide and modulo
+    if (!divStrides.empty() || !modStrides.empty() || !divExprStrides.empty() ||
+        !modExprStrides.empty())
+        return {};
+    Range ret{constant, constant + 1};
+    for (const auto &[iter, stride] : strides) {
+        const auto &[l, r] = rangeOp->getRange(iter);
+        if (stride > 0) {
+            ret.first += l * stride;
+            ret.second += (r - 1) * stride;
+        } else {
+            ret.first += (r - 1) * stride;
+            ret.second += l * stride;
+        }
+    }
+    return ret;
+}
+
+PtrMap<Iterator, int> SimplifyExprVisitor::getStrides(const Expr &expr) {
+    return getStridesConstant(expr).first;
+}
+
+int SimplifyExprVisitor::getConstant(const Expr &expr) {
+    return getStridesConstant(expr).second;
+}
+
+Expr SimplifyExprVisitor::simplify(const Expr &expr) {
+    getStrides(expr);
+    Expr ret = nullptr;
+    // merge divide and modulo items
+    for (const auto &[iterDividerPair, divStride] : divStrides) {
+        const auto &[iter, mod] = iterDividerPair;
+        // mod < 0 is a marker for merging vars with negtive strides. In math,
+        // divider < 0 is not well-defined for mod, so it should be exist in our
+        // epxrs and is only a temporary state which must be simpilified now.
+        if (mod < 0) { // must perfectly merged.
+            const auto &modStride = modStrides[iterDividerPair];
+            assert(divStride / abs(mod) == modStride);
+            assert(divStride > 0);
+            strides.try_emplace(iterDividerPair.first, 0);
+            strides[iterDividerPair.first] += abs(divStride / mod);
+            modStrides.erase(iterDividerPair);
+        } else if (divStride % mod == 0 && modStrides.count(iterDividerPair)) {
+            const auto &modStride = modStrides[iterDividerPair];
+            if (divStride / mod == modStride) {
+                strides.try_emplace(iterDividerPair.first, 0);
+                strides[iterDividerPair.first] += divStride / mod;
+                modStrides.erase(iterDividerPair);
+            } else
+                ret = ret + divStride * (iterDividerPair.first /
+                                         iterDividerPair.second);
+        } else
+            ret = ret +
+                  divStride * (iterDividerPair.first / iterDividerPair.second);
+    }
+    // remaining modulo items
+    for (const auto &[iterDividerPair, stride] : modStrides) {
+        ret = ret + stride * (iterDividerPair.first % iterDividerPair.second);
+    }
+    // normal constant*variable items
+    for (const auto &[iter, stride] : strides) {
+        if (stride == 0)
+            continue;
+        Expr subexpr;
+        if (stride == 1)
+            subexpr = iter;
+        else
+            subexpr = stride * iter;
+        ret = (ret) ? ret + subexpr : subexpr;
+    }
+    // not perfectly nested divide and modulo items
+    for (const auto &[iterDividerPair, stride] : divExprStrides) {
+        ret = ret + stride * (iterDividerPair.first / iterDividerPair.second);
+    }
+    for (const auto &[iterDividerPair, stride] : modExprStrides) {
+        ret = ret + stride * (iterDividerPair.first % iterDividerPair.second);
+    }
+    ret = ret + constant;
+    return ret ? ret : make_ref<ConstantNode>(0);
+}
+
+void SimplifyExprVisitor::visit_(const BinaryOp &c, optional<int> stride) {
+    if (verbose)
+        dbg(c);
+    if (c->getOpType() == OpType::Add) {
+        dispatch(c->getLhs(), stride);
+        dispatch(c->getRhs(), stride);
+    } else if (c->getOpType() == OpType::Sub) {
+        dispatch(c->getLhs(), stride);
+        if (stride)
+            *stride = -*stride;
+        dispatch(c->getRhs(), stride);
+    } else if (c->getOpType() == OpType::Mul) {
+        const optional<int> &lStride = subexprStride[c->getLhs().get()];
+        const optional<int> &rStride = subexprStride[c->getRhs().get()];
+        optional<int> lCurStride =
+            (stride && rStride) ? optional(*stride * *rStride) : nullopt;
+        optional<int> rCurStride =
+            (stride && lStride) ? optional(*stride * *lStride) : nullopt;
+        dispatch(c->getLhs(), lCurStride);
+        dispatch(c->getRhs(), rCurStride);
+    } else if (c->getOpType() == OpType::Mod) {
+        const auto &param = c->getModDivParameter();
+        if (param.has_value()) {
+            modStrides.try_emplace(*param, 0);
+            modStrides[*param] += stride.value();
+        } else {
+            const auto &paramExpr = c->getModDivExpr();
+            modExprStrides.try_emplace(paramExpr, 0);
+            modExprStrides[paramExpr] += stride.value();
+        }
+    } else if (c->getOpType() == OpType::Div) {
+        const auto &param = c->getModDivParameter();
+        if (param.has_value()) {
+            divStrides.try_emplace(*param, 0);
+            divStrides[*param] += stride.value();
+        } else {
+            const auto &paramExpr = c->getModDivExpr();
+            divExprStrides.try_emplace(paramExpr, 0);
+            divExprStrides[paramExpr] += stride.value();
+        }
+    } else
+        nnet_unimplemented_halt();
+}
+void SimplifyExprVisitor::visit_(const Var &c, optional<int> stride) {
+    if (verbose)
+        dbg(c);
+    strides.try_emplace(c);
+    strides[c] += stride.value();
+}
+void SimplifyExprVisitor::visit_(const Constant &c, optional<int> stride) {
+    if (stride.has_value())
+        constant += stride.value() * c->getValue();
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/SimplifyFormulaMutator.cc b/src/nnet/Visitor/SimplifyFormulaMutator.cc
new file mode 100644
index 00000000..86dd5f74
--- /dev/null
+++ b/src/nnet/Visitor/SimplifyFormulaMutator.cc
@@ -0,0 +1,30 @@
+#include "nnet/Visitor/SimplifyFormulaMutator.h"
+#include "nnet/Visitor/SimplifyExprVisitor.h"
+
+namespace nnet {
+
+Expr SimplifyFormulaMutator::visit_(const Subscript &c) {
+    ++nSubscripts;
+    if (verbose)
+        dbg(*c);
+    bool modified = false;
+    auto ret = make_ref<SubscriptNode>(*c);
+    for (size_t i = 0; i < ret->getDims(); ++i) {
+        const auto &e = ret->getIndex(i);
+        if (const auto &mutated = SimplifyExprVisitor().simplify(e)) {
+            modified = true;
+            ret->setIndex(i, mutated);
+        }
+    }
+    return (modified) ? ret : nullptr;
+}
+
+Expr SimplifyFormulaMutator::simplify(const Expr &expr) {
+    nSubscripts = 0;
+    const auto &ret = dispatch(expr);
+    nnet_assert(nSubscripts > 0,
+                "Subscript NOT found. Use SimplifyFormulaMutator?");
+    return (ret) ? ret : expr;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/StrideVisitor.cc b/src/nnet/Visitor/StrideVisitor.cc
new file mode 100644
index 00000000..f623b99c
--- /dev/null
+++ b/src/nnet/Visitor/StrideVisitor.cc
@@ -0,0 +1,59 @@
+#include "nnet/Visitor/StrideVisitor.h"
+
+namespace nnet {
+
+optional<int> StrideVisitor::visit_(const Subscript &c) {
+    if (verbose)
+        dbg(*c);
+    auto object = as<TensorNode>(c->getObject());
+    assert(object);
+    for (int i = (int)c->getDims() - 1; i >= 0; --i)
+        this->dispatch(c->getIndex(i));
+    return {};
+}
+
+optional<int> StrideVisitor::visit_(const Var &c) {
+    if (verbose)
+        dbg(*c);
+    // assert(subexprStride.count(&c) == 0);
+    subexprStride[c.get()] = {};
+    return {};
+}
+
+optional<int> StrideVisitor::visit_(const Constant &c) {
+    if (verbose)
+        dbg(*c);
+    optional ret{c->getValue()};
+    // assert(subexprStride.count(&c) == 0);
+    subexprStride[c.get()] = ret;
+    return ret;
+}
+
+optional<int> StrideVisitor::visit_(const BinaryOp &c) {
+    if (verbose)
+        dbg(*c);
+    optional<int> strideL = this->dispatch(c->getLhs());
+    optional<int> strideR = this->dispatch(c->getRhs());
+    if (!strideL || !strideR)
+        return {};
+    optional<int> ret;
+    switch (c->getOpType()) {
+    case OpType::Add:
+        ret = optional(*strideL + *strideR);
+        break;
+    case OpType::Sub:
+        ret = optional(*strideL - *strideR);
+        break;
+    case OpType::Mul:
+        ret = optional(*strideL * *strideR);
+        break;
+    default:
+        nnet_unimplemented_halt();
+        break;
+    }
+    // assert(subexprStride.count(&c) == 0);
+    subexprStride[c.get()] = ret;
+    return ret;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/Visitor/hashVisitor.cc b/src/nnet/Visitor/hashVisitor.cc
new file mode 100644
index 00000000..359e2335
--- /dev/null
+++ b/src/nnet/Visitor/hashVisitor.cc
@@ -0,0 +1,156 @@
+#include "nnet/Visitor/HashVisitor.h"
+#include "nnet/Visitor/FullPrinterVisitor.h"
+#include "nnet/Visitor/SimplifyExprVisitor.h"
+namespace nnet {
+
+constexpr int varPrefix = 11027;
+constexpr int binPrefix = 11047;
+constexpr int ssPrefix = 11057;
+constexpr int addPrefix = 11059;
+constexpr int mulPrefix = 11069;
+constexpr int vecPrefix = 11071;
+constexpr int tensorPrefix = 11083;
+constexpr int valSuffix = 6214587;
+
+static inline HashType hash(const HashType a, const HashType b) {
+    return (a * 10007 + b + 12345) % 1000000007;
+}
+
+static inline HashType hash(const std::string &s) {
+    HashType ret = 0;
+    for (auto c : s)
+        ret = hash(ret, c);
+    return ret;
+}
+
+static inline HashType hash(const OpType c) { return HashType(c); }
+
+HashType HashVisitor::getHash(const Expr &c) { return dispatch(c); }
+
+HashType HashVisitor::visit_(const Constant &c) {
+    auto val = c->getValue();
+    return genhash(val, valSuffix);
+}
+
+HashType HashVisitor::visit_(const BinaryOp &c) {
+    HashType hasha = dispatch(c->getLhs());
+    HashType hashb = dispatch(c->getRhs());
+
+    if (c->isSwapable()) {
+        if (hasha > hashb) {
+            std::swap(hasha, hashb);
+        }
+    }
+    return hash(binPrefix, hash(hash(c->getOpType()), hash(hasha, hashb)));
+    return 0;
+}
+
+HashType hashLoopVar(const int id, const Range &range) {
+    return hash(varPrefix, hash(id, hash(range.first, range.second)));
+}
+
+HashType hashSumVar(const Range &range) {
+    return hash(varPrefix, hash(range.first, range.second));
+}
+
+HashType HashVisitor::visit_(const RangeOp &c) {
+    // Identify loop variables
+    for (const auto &[var, range] : c->getLoopVarRanges()) {
+        nnet_assert(varHash.find(var) == varHash.end(),
+                    "In HashVisiter::RangeOp invalid loop var.");
+        varHash[var] = hashLoopVar(nLoopVars++, range);
+    }
+
+    // Identify sum variables according to range
+    for (const auto &[var, range] : c->getSumVarRanges()) {
+        nnet_assert(varHash.find(var) == varHash.end(),
+                    "In HashVisiter::RangeOp invalid sum var.");
+        varHash[var] = hashSumVar(range);
+    }
+
+    auto expr = c->getSummand();
+    return dispatch(expr);
+}
+
+HashType HashVisitor::visit_(const Subscript &c) {
+    HashType curHash = ssPrefix;
+    auto obj = c->getObject();
+    if (obj->getType() == NodeType::RangeOpNodeType) {
+        curHash = hash(curHash, HashVisitor().getHash(obj));
+    } else if (obj->getType() == NodeType::TensorNodeType) {
+        // TODO: hash should based on arguments
+        curHash = hash(curHash, dispatch(obj));
+    } else {
+        nnet_unimplemented_halt();
+    }
+
+    for (const auto &expr : c->getIndex()) {
+        if (expr->getType() == NodeType::BinaryOpNodeType) {
+            HashType tmp = addPrefix;
+            std::vector<std::pair<HashType, HashType>> coefficients;
+            auto seVisitor = SimplifyExprVisitor();
+            auto [c, x] = seVisitor.getStridesConstant(expr);
+            for (const auto &[key, value] : c) {
+                coefficients.emplace_back(varHash[key], value);
+            }
+            for (const auto &[iter, value] : seVisitor.getDivStrides()) {
+                nnet_assert(iter.second != 1, "invalid div expr");
+                coefficients.emplace_back(
+                    hash(binPrefix, hash(varHash[iter.first], iter.second)),
+                    value);
+            }
+            sort(coefficients.begin(), coefficients.end());
+            tmp = hash(tmp, x);
+            for (const auto &[key, value] : coefficients) {
+                tmp = hash(tmp, hash(mulPrefix, hash(key, value)));
+            }
+            curHash = hash(curHash, tmp);
+            continue;
+        }
+        if (expr->getType() == NodeType::ConstantNodeType) {
+            curHash = hash(curHash, dispatch(expr));
+            continue;
+        }
+        if (expr->getType() == NodeType::VarNodeType) {
+            curHash = hash(curHash, dispatch(expr));
+            continue;
+        }
+        nnet_unimplemented_halt();
+    }
+    return curHash;
+}
+
+HashType hashPadding(const std::vector<int> &pad) {
+    HashType cur = hash(vecPrefix, pad.size());
+    for (const auto &e : pad) {
+        cur = hash(cur, e);
+    }
+    return cur;
+}
+
+HashType HashVisitor::visit_(const Tensor &c) {
+    // TODO: remove this
+    // TODO: check if hash name includes padding.
+
+    if (c->getSource() == nullptr) {
+        return hash(tensorPrefix, genhash(c->getName()));
+    }
+    // dbg(c, c->getSource()->getExpr(), FullPrinterVisitor().print(c),
+    //     FullPrinterVisitor().print(c->getSource()->getExpr()));
+    // std::cout << "Tensor: " << int(c->getSource()->getExpr()->getType())
+    //           << std::endl;
+    // std::cout << "Tensor: " << c->getSource()->getExpr()->toReadable()
+    //           << std::endl;
+    return hash(tensorPrefix,
+                HashVisitor().dispatch(c->getSource()->getExpr()));
+}
+
+HashType HashVisitor::visit_(const Var &c) {
+    if (varHash.find(c) == varHash.end()) {
+        nnet_unimplemented_halt();
+        return 0;
+    }
+    return varHash[c];
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/derivator.cc b/src/nnet/derivator.cc
new file mode 100644
index 00000000..11165934
--- /dev/null
+++ b/src/nnet/derivator.cc
@@ -0,0 +1,529 @@
+#include "nnet/derivator.h"
+#include "nnet/Pass/MatchComputationKernel.h"
+#include "nnet/Pass/MatchMemBoundKernel.h"
+#include "nnet/Pass/Rule1VariableSplit.h"
+#include "nnet/Pass/Rule2VariableMerging.h"
+#include "nnet/Pass/Rule3StageSplit.h"
+#include "nnet/Pass/Rule4StageMerging.h"
+#include "nnet/Pass/Rule5RangeRelaxation.h"
+#include "nnet/Pass/Rule6KenerlMatching.h"
+#include "nnet/Pass/Rule7DLT.h"
+#include "nnet/Pass/Rule8GuidedDLT.h"
+#include "nnet/Pass/Rule90TwoStageElementWise.h"
+#include "nnet/Pass/Rule91MergeStagesWithSum.h"
+#include "nnet/Pass/Rule9RangeMagnify.h"
+#include "nnet/Visitor/CheckOOBVisitor.h"
+#include "nnet/Visitor/CloneMutator.h"
+#include "nnet/Visitor/CompareMultiFormulasVisitor.h"
+#include "nnet/Visitor/CountRoutineVisitor.h"
+#include "nnet/Visitor/FullPrinterVisitor.h"
+#include "nnet/Visitor/HashVisitor.h"
+#include "nnet/Visitor/MergeMemboundMutator.h"
+#include "nnet/Visitor/Serializer.h"
+#include "nnet/test.h"
+
+namespace nnet {
+
+class SaveStateGuard {
+    Derivator &derivator;
+
+  public:
+    SaveStateGuard(Derivator &derivator, const Expr &origin,
+                   const string &ruleName, const string &ruleMsg = "")
+        : derivator(derivator) {
+        derivator.pushIntermediateState(origin);
+        derivator.pushRuleState(ruleName);
+        derivator.pushRuleMsg(ruleMsg);
+    }
+    ~SaveStateGuard() {
+        derivator.popIntermediateState();
+        derivator.popRuleState();
+        derivator.popRuleMsg();
+    }
+};
+
+#define SetUpStateGuard()                                                      \
+    SaveStateGuard __guard(*this, origin.root, __FUNCTION__)
+
+void Derivator::dfs(Formula &origin, int depth) {
+    guidedSearch(origin, depth);
+
+    if (depth >= maxDepth) {
+        return;
+    }
+    Expr *curExpr = &origin.root;
+    nnet_assert((*curExpr)->getType() == NodeType::RangeOpNodeType, __LINE__);
+    while ((*curExpr)->getType() == NodeType::RangeOpNodeType) {
+        auto curRangeOp = as<RangeOpNode>(*curExpr);
+        checkOOB(curRangeOp);
+        auto summand = curRangeOp->getSummand();
+        if (summand->getType() == NodeType::SubscriptNodeType) {
+            auto subscriptOp = as<SubscriptNode>(summand);
+            if (rule4StageMerging(origin, depth, *curExpr)) {
+                return;
+            }
+            curExpr = subscriptOp->getObjectPtr();
+            nnet_assert(*curExpr != nullptr, __LINE__);
+            continue;
+        }
+        if (summand->getType() == NodeType::BinaryOpNodeType) {
+            if (cntAppliedRules[1] < 3)
+                rule1VariableSplit(origin, depth, *curExpr); // +1/0
+            rule2VariableMerging(origin, depth, *curExpr);   // +1
+            if (cntAppliedRules[3] < 1)
+                rule3StageSplit(origin, depth, *curExpr);  // +1
+            rule5RangeRelaxation(origin, depth, *curExpr); // 0
+            rule7DLT(origin, depth, *curExpr);
+            rule9RangeMagnify(origin, depth, *curExpr);
+            return;
+        }
+        nnet_unimplemented_halt();
+    }
+    // RangeOp curRangeOp;
+    // for (Expr *curExpr = &origin.root;
+    //      curExpr && (curRangeOp = as<RangeOpNode>(*curExpr));) {
+    //     checkOOB(curRangeOp);
+    //     auto subscript = as<SubscriptNode>(curRangeOp->getSummand());
+    //     // isSimplyNested: a directly nested stage
+    //     bool isSimplyNested = (subscript &&
+    //     subscript->isRangeOpSubscripted()); if (rule4StageMerging(origin,
+    //     depth, *curExpr))
+    //         return;
+    //     // For the next nested stage
+    //     curExpr = (isSimplyNested) ? subscript->getObjectPtr() : nullptr;
+    // }
+
+    // int stage = 0;
+    // for (Expr *curExpr = &origin.root;
+    //      curExpr && (curRangeOp = as<RangeOpNode>(*curExpr));) {
+    //     stage++;
+    //     // isSimplyNested: a directly nested stage
+    //     auto subscript = as<SubscriptNode>(curRangeOp->getSummand());
+    //     bool isSimplyNested = (subscript &&
+    //     subscript->isRangeOpSubscripted());
+
+    //     // TODO recover it
+    //     // permuteRangeOps(origin, depth, *curExpr);
+    //     // extractSubexpression(origin, depth, *curExpr);
+
+    //     rule4StageMerging(origin, depth, *curExpr);
+
+    //     if (!isSimplyNested) {
+    //         std::cout << "num stage: " << depth << " " << stage << std::endl;
+    //         if (depth < 5) {
+    //             rule1VariableSplit(origin, depth, *curExpr);   // +1/0
+    //             rule3StageSplit(origin, depth, *curExpr);      // +1
+    //             rule2VariableMerging(origin, depth, *curExpr); // +1
+    //             rule5RangeRelaxation(origin, depth, *curExpr); // 0
+    //             rule9RangeMagnify(origin, depth, *curExpr);
+    //         }
+    //         if (depth >= 5) {
+    //             rule1VariableSplit(origin, depth, *curExpr);   // +1/0
+    //             rule3StageSplit(origin, depth, *curExpr);      // +1
+    //             rule2VariableMerging(origin, depth, *curExpr); // +1
+    //             rule5RangeRelaxation(origin, depth, *curExpr); // 0
+    //             rule6KenerlMatching(origin, depth, *curExpr);  // -1
+    //             rule7DLT(origin, depth, *curExpr);             // +1
+    //             rule8GuidedDLT(origin, depth, *curExpr);       //
+    //             rule9RangeMagnify(origin, depth, *curExpr);
+    //         }
+    //     }
+    //     // For the next nested stage
+    //     curExpr = (isSimplyNested) ? subscript->getObjectPtr() : nullptr;
+    // }
+}
+
+Derivator::Derivator(int maxDepth, bool enableHashPruning, LogMode logMode,
+                     PassMode passMode)
+    : maxDepth(maxDepth), logMode(logMode), passMode(passMode),
+      enableHashPruning(enableHashPruning), cntAppliedRules(12) {}
+
+int Derivator::getNumIntermediateStates() { return cntStates; }
+
+void Derivator::guidedSearch(Formula &origin, int depth) {
+    if (origin.root->getType() == NodeType::TensorNodeType) {
+        auto tensor = as<TensorNode>(origin.root);
+        appendCanddiate(tensor, depth);
+        return;
+    }
+    Expr *expr = &origin.root;
+    nnet_assert((*expr)->getType() == NodeType::RangeOpNodeType, __LINE__);
+    while ((*expr)->getType() == NodeType::RangeOpNodeType) {
+        auto rangeOp = as<RangeOpNode>(*expr);
+        checkOOB(rangeOp);
+        auto summand = rangeOp->getSummand();
+        if (summand->getType() == NodeType::SubscriptNodeType) {
+            auto subscriptOp = as<SubscriptNode>(summand);
+            if (rule4StageMerging(origin, depth, *expr)) {
+                return;
+            }
+            expr = subscriptOp->getObjectPtr();
+            nnet_assert(*expr != nullptr, __LINE__);
+            continue;
+        }
+        if (summand->getType() == NodeType::BinaryOpNodeType) {
+            break;
+        }
+        nnet_unimplemented_halt();
+    }
+
+    if (searchState == 0) {
+        searchState = 1;
+        rule8GuidedDLT(origin, depth, *expr);
+        searchState = 0;
+        return;
+    }
+    if (searchState == 1) {
+        rule8GuidedDLT(origin, depth, *expr);
+        return;
+    }
+    if (searchState == 2) {
+        matchComputationKernel(origin, depth, *expr);
+        return;
+    }
+    if (searchState == 3) {
+        // Pack the remaining computation as a MemBoundOp
+        matchMemBoundKernel(origin, depth, origin.root);
+        return;
+    }
+    nnet_unimplemented_halt();
+    return;
+}
+
+void Derivator::ruleBasedDerivate(Formula &origin, int depth) {
+    // string StartDfs = "ruleBasedDerivate dep=" + std::to_string(depth) +
+    //                   ", targetOp=" +
+    //                   std::to_string(routineTypeToId(targetOp));
+    // dbg(StartDfs, origin);
+    auto tensor = as<TensorNode>(origin.root);
+    if (tensor) {
+        appendCanddiate(tensor, depth);
+        return;
+    }
+    if (depth >= (int)rulesOverall.size())
+        return;
+    RangeOp curRangeOp;
+    for (Expr *curExpr = &origin.root;
+         curExpr && (curRangeOp = as<RangeOpNode>(*curExpr));) {
+        checkOOB(curRangeOp);
+        auto subscript = as<SubscriptNode>(curRangeOp->getSummand());
+        // isSimplyNested: a directly nested stage
+        bool isSimplyNested = (subscript && subscript->isRangeOpSubscripted());
+        if (rule4StageMerging(origin, depth, *curExpr))
+            return;
+        // For the next nested stage
+        curExpr = (isSimplyNested) ? subscript->getObjectPtr() : nullptr;
+    }
+    int stageDepth = 0;
+    for (Expr *curExpr = &origin.root;
+         curExpr && (curRangeOp = as<RangeOpNode>(*curExpr));) {
+        // isSimplyNested: a directly nested stage
+        auto subscript = as<SubscriptNode>(curRangeOp->getSummand());
+        bool isSimplyNested = (subscript && subscript->isRangeOpSubscripted());
+        stageDepth++;
+
+        for (int rule : rulesOverall[depth]) {
+            if (rule == 1)
+                rule1VariableSplit(origin, depth, *curExpr);
+            else if (!isSimplyNested) {
+                if (rule == 2)
+                    rule2VariableMerging(origin, depth, *curExpr);
+                else if (rule == 3)
+                    rule3StageSplit(origin, depth, *curExpr);
+                else if (rule == 5)
+                    rule5RangeRelaxation(origin, depth, *curExpr);
+                else if (rule == 6)
+                    rule6KenerlMatching(origin, depth, *curExpr);
+                else if (rule == 7)
+                    rule7DLT(origin, depth, *curExpr);
+                else if (rule == 8)
+                    rule8GuidedDLT(origin, depth, *curExpr);
+                else if (rule == 9)
+                    rule9RangeMagnify(origin, depth, *curExpr);
+            }
+        }
+        // For the next nested stage
+        curExpr = (isSimplyNested) ? subscript->getObjectPtr() : nullptr;
+    }
+    for (int rule : rulesOverall[depth])
+        if (rule == 90 && stageDepth == 2) // HACK: for (T)Conv2gemm
+            rule90TwoStageElementWise(origin, depth, origin.root);
+        else if (rule == 91 && stageDepth >= 2) // HACK: for TConv2gemm
+            rule91MergeStagesWithSum(origin, depth, origin.root);
+}
+
+void Derivator::nextStep(Formula &origin, int depth, Expr &rCur, Expr newCur) {
+    // Count the number of searched states
+    ++cntStates;
+    rCur.swap(newCur);
+
+    HashType formulaHash = HashVisitor().getHash(origin.root);
+    if (enableHashPruning) {
+        if (searchState != 2) {
+            if (visited.find(formulaHash) != visited.end()) {
+                rCur.swap(newCur);
+                return;
+            }
+            visited.emplace(formulaHash);
+        }
+    }
+
+    if (searchState > 0) {
+        guidedSearch(origin, depth);
+    } else {
+        searchedMaxDepth = max(searchedMaxDepth, depth + 1);
+
+        if (searchStrategy == Strategy::DFS ||
+            (searchStrategy == Strategy::RuleAndDFS &&
+             depth + 1 >= (ssize_t)rulesOverall.size()))
+            dfs(origin, depth + 1);
+        else
+            ruleBasedDerivate(origin, depth + 1);
+    }
+    rCur.swap(newCur);
+}
+
+void Derivator::ruleBasedDFS(Formula &origin, int depth, vector<int> _rules,
+                             map<int, vector<Iterator>> _substituteRules,
+                             bool searchAfterRules) {
+    SaveStateGuard guard(*this, origin.root, string("Init: ") + __FUNCTION__);
+    searchStrategy = (searchAfterRules) ? Strategy::RuleAndDFS : Strategy::Rule;
+    rulesOverall.clear();
+    for (auto i : _rules)
+        rulesOverall.push_back({i});
+    substituteRules = _substituteRules;
+    ruleBasedDerivate(origin, depth);
+}
+
+void Derivator::search(Formula &origin, int depth) {
+    SaveStateGuard guard(*this, origin.root, string("Init: ") + __FUNCTION__);
+    searchStrategy = Strategy::DFS;
+    dfs(origin, depth);
+}
+
+void Derivator::print() {
+    std::cout << "[RESULT] Derivator::results: " << candidates.size()
+              << std::endl;
+    std::cout << "==== DFS candidates (" << candidates.size()
+              << ")====" << std::endl;
+    for (const auto &f : candidates) {
+        std::cout << f.toReadable() << std::endl;
+        // dbg(f.bfsDepth, f.toReadable());
+    }
+    std::cout << "==== DFS log end ====" << std::endl;
+}
+
+string Formula::toReadable() const { return FullPrinterVisitor().print(root); }
+
+void Derivator::rule1VariableSplit(Formula &origin, int depth, Expr &rCur) {
+    ++cntAppliedRules[1];
+    Rule1VariableSplit(*this).run(origin, depth, rCur);
+    --cntAppliedRules[1];
+}
+
+void Derivator::rule2VariableMerging(Formula &origin, int depth, Expr &rCur) {
+    ++cntAppliedRules[2];
+    Rule2VariableMerging(*this).run(origin, depth, rCur);
+    --cntAppliedRules[2];
+}
+
+void Derivator::rule3StageSplit(Formula &origin, int depth, Expr &rCur) {
+    ++cntAppliedRules[3];
+    Rule3StageSplit(*this).run(origin, depth, rCur);
+    --cntAppliedRules[3];
+}
+
+bool Derivator::rule4StageMerging(Formula &origin, int depth, Expr &rCur,
+                                  bool mergeStageWithCalc) {
+    ++cntAppliedRules[4];
+    Rule4StageMerging pass(*this);
+    pass.setMergeStageWithCalc(mergeStageWithCalc);
+    pass.run(origin, depth, rCur);
+    --cntAppliedRules[4];
+    return pass.isSuccessful();
+}
+
+void Derivator::rule5RangeRelaxation(Formula &origin, int depth, Expr &rCur) {
+    ++cntAppliedRules[5];
+    Rule5RangeRelaxation(*this).run(origin, depth, rCur);
+    --cntAppliedRules[5];
+}
+
+void Derivator::rule6KenerlMatching(Formula &origin, int depth, Expr &rCur) {
+    ++cntAppliedRules[6];
+    Rule6KenerlMatching(*this).run(origin, depth, rCur);
+    --cntAppliedRules[6];
+}
+
+void Derivator::rule7DLT(Formula &origin, int depth, Expr &rCur) {
+    ++cntAppliedRules[7];
+    Rule7DLT(*this).run(origin, depth, rCur);
+    --cntAppliedRules[7];
+}
+
+void Derivator::rule8GuidedDLT(Formula &origin, int depth, Expr &rCur) {
+    ++cntAppliedRules[8];
+    Rule8GuidedDLT(*this).run(origin, depth, rCur);
+    --cntAppliedRules[8];
+}
+
+void Derivator::rule9RangeMagnify(Formula &origin, int depth, Expr &rCur) {
+    ++cntAppliedRules[9];
+    Rule9RangeMagnify(*this).run(origin, depth, rCur);
+    --cntAppliedRules[9];
+}
+
+void Derivator::rule90TwoStageElementWise(Formula &origin, int depth,
+                                          Expr &rCur) {
+    Rule90TwoStageElementWise(*this).run(origin, depth, rCur);
+}
+
+void Derivator::rule91MergeStagesWithSum(Formula &origin, int depth,
+                                         Expr &rCur) {
+    Rule91MergeStagesWithSum(*this).run(origin, depth, rCur);
+}
+
+void Derivator::matchComputationKernel(Formula &origin, int depth, Expr &rCur) {
+    MatchComputationKernel(*this).run(origin, depth, rCur);
+}
+
+void Derivator::matchMemBoundKernel(Formula &origin, int depth, Expr &rCur) {
+    MatchMemBoundKernel(*this).run(origin, depth, rCur);
+}
+
+bool Derivator::stageCombination(MultiFormulas &origin, int depth) {
+    return (CompareMultiFormulasVisitor().compare(origin.roots));
+}
+
+Expr Derivator::mergeMemboundStages(VecExpr stages) {
+    auto nested = MergeMemboundMutator(stages).merge();
+    return nested;
+}
+
+void Derivator::appendCanddiate(const Tensor &tensor, int depth) {
+    // if (!CountRoutineVisitor().match(tensor, 1, 0, 3))
+    //     return;
+
+    candidates.emplace_back(tensor, depth);
+    // dbg("!!!!!!!!!!!!!!!Success!!!!!!!!!!!!!!!");
+    if (enableEquivalenceCheck)
+        checkDerivationEquivalence();
+    // printIntermediateStates();
+}
+
+bool Derivator::checkOOB(const RangeOp &rangeOp, bool halt) {
+    // Skip check in NoLog mode
+    if (logMode == LogMode::NoLog)
+        return false;
+    bool hasOOB = CheckOOBVisitor().checkRangeOp(rangeOp);
+    if (hasOOB) {
+        printIntermediateStates();
+        dbg(FullPrinterVisitor().print(rangeOp));
+        if (halt)
+            nnet_assert(0, "Out Of Bound in index!");
+    }
+    return hasOOB;
+}
+
+string Derivator::newTensorName() {
+    return "T" + std::to_string(++nTensorNames);
+}
+
+Var Derivator::getNewVar() {
+    return make_ref<VarNode>("i" + std::to_string(++nIteratorNames));
+}
+
+void Derivator::pushIntermediateState(const Expr &expr) {
+    intermediateStates.emplace_back(CloneMutator().clone(expr));
+};
+
+void Derivator::pushRuleState(const string &state) {
+    ruleStates.emplace_back(state);
+}
+
+void Derivator::pushRuleMsg(const string &state) {
+    ruleMsgs.emplace_back(state);
+}
+
+void Derivator::popIntermediateState() { intermediateStates.pop_back(); }
+
+void Derivator::popRuleState() { ruleStates.pop_back(); }
+
+void Derivator::popRuleMsg() { ruleMsgs.pop_back(); }
+
+RoutineType Derivator::getTargetOp() { return targetOp; }
+
+void Derivator::setTargetOp(RoutineType _targetOp) { targetOp = _targetOp; }
+
+int Derivator::getSearchState() { return searchState; }
+
+void Derivator::setSearchState(int _searchState) { searchState = _searchState; }
+
+void Derivator::printStatistics() {
+    printf("==== Derivator statistics ====\n");
+    printf("Max Depth = %d\n", maxDepth);
+    printf("searchStrategy = ");
+    if (searchStrategy == Strategy::DFS)
+        printf("DFS\n");
+    else if (searchStrategy == Strategy::Rule)
+        printf("Rule\n");
+    else if (searchStrategy == Strategy::RuleAndDFS)
+        printf("RuleAndDFS\n");
+    printf("enableHashPruning = %s\n", enableHashPruning ? "true" : "false");
+    printf("Reached Max Depth during search = %d\n", searchedMaxDepth);
+    printf("#Candidates = %lu\n", candidates.size());
+    printf("#Intermediate states = %d\n", cntStates);
+    printf("#Hashed intermediate states = %lu\n", visited.size());
+    printf("#Iteratos = %d\n", nIteratorNames);
+    printf("#Tensors = %d\n", nTensorNames);
+}
+
+void Derivator::setDumpFirstSuccess(const string &_logFnPrefix) {
+    setEquivalenceCheck();
+    logMode = LogMode::DumpFristCandiate;
+    logFnPrefix = _logFnPrefix;
+}
+
+void Derivator::printIntermediateStates() {
+    // Skip in NoLog mode
+    if (logMode == LogMode::NoLog)
+        return;
+    assert(intermediateStates.size() == ruleStates.size());
+    assert(intermediateStates.size() == ruleMsgs.size());
+    for (size_t i = 0; i < intermediateStates.size(); ++i) {
+        string msg = "=== Depth " + std::to_string(i) + " " + ruleStates[i] +
+                     ": " + ruleMsgs[i];
+        std::cout << msg << endl;
+        std::cout << FullPrinterVisitor().print(intermediateStates[i]) << endl;
+        if (logMode == LogMode::DumpFristCandiate) {
+            Serializer serializer;
+            serializer.serialize(intermediateStates[i],
+                                 logFnPrefix + to_string(i) + ".expr", msg);
+        }
+    }
+    for (size_t i = 0; i < intermediateStates.size(); ++i) {
+        if (auto cur = as<RangeOpNode>(intermediateStates[i]))
+            if (CheckOOBVisitor().checkRangeOp(cur)) {
+                printf("OOB detected depth=%lu\n", i);
+            }
+    }
+    if (logMode == LogMode::DumpFristCandiate) {
+        puts("Serializaiton finished.");
+        exit(0);
+    }
+}
+
+void Derivator::checkDerivationEquivalence() {
+    if (!checkExprsEquvivalence(intermediateStates)) {
+        nnet_assert(0, "Inequivalent derivation");
+        exit(1);
+    }
+}
+
+void Derivator::setEquivalenceCheck() { enableEquivalenceCheck = true; }
+
+Derivator::PassMode Derivator::getPassMode() { return passMode; }
+
+Derivator::LogMode Derivator::getLogMode() { return logMode; }
+
+} // namespace nnet
diff --git a/src/nnet/dlt.cc b/src/nnet/dlt.cc
new file mode 100644
index 00000000..62ce6f96
--- /dev/null
+++ b/src/nnet/dlt.cc
@@ -0,0 +1,131 @@
+#include "nnet/dlt.h"
+#include "nnet/Visitor/SimplifyExprVisitor.h"
+#include "nnet/visitor.h"
+#include <algorithm>
+
+namespace nnet {
+
+optional<Expr> DLT::apply(const RangeOp &rangeOp, const Subscript &subscript,
+                          string newTensorName) {
+    const auto &tensor = as<TensorNode>(subscript->getObject());
+    assert(tensor != nullptr);
+    vector<int> shape0(tensor->getShape()), shape1;
+    const auto &paddings = tensor->getPaddings();
+    VecExpr index0 = subscript->getIndex(), index1;
+    // compute new shapes and index
+    for (const auto &opPtr : ops) {
+        if (auto op = as<DLTSplit>(opPtr)) {
+            assert(op->dim < (int)shape0.size());
+            for (int i = 0; i < (int)shape0.size(); ++i)
+                if (i != op->dim) {
+                    shape1.emplace_back(shape0[i]);
+                    index1.emplace_back(index0[i]);
+                } else {
+                    assert(shape0[i] % op->factor == 0);
+                    shape1.emplace_back(shape0[i] / op->factor);
+                    shape1.emplace_back(op->factor);
+                    if (const auto &opt =
+                            splitIndex(index0[i], op->factor, rangeOp);
+                        opt.has_value()) {
+                        index1.emplace_back(opt->first);
+                        index1.emplace_back(opt->second);
+                    } else
+                        return {};
+                }
+        } else if (auto op = as<DLTMerge>(opPtr)) {
+            assert(op->dim0 < (int)shape0.size());
+            assert(op->dim1 < (int)shape0.size());
+            for (int i = 0; i < (int)shape0.size(); ++i)
+                if (i == op->dim0) {
+                    shape1.emplace_back(shape0[op->dim0] * shape0[op->dim1]);
+                    index1.emplace_back(index0[op->dim0] * shape0[op->dim1] +
+                                        index0[op->dim1]);
+                } else if (i != op->dim1) {
+                    shape1.emplace_back(shape0[i]);
+                    index1.emplace_back(index0[i]);
+                }
+        } else if (auto op = as<DLTReorder>(opPtr)) {
+            if (op->dims.size() != shape0.size()) {
+                // TODO: input Reorder should have the same order with tensor
+                nnet_unimplemented_continue();
+                return {};
+            }
+            assert(op->dims.size() == shape0.size());
+            for (size_t i = 0; i < shape0.size(); ++i) {
+                shape1.emplace_back(shape0[op->dims[i]]);
+                index1.emplace_back(index0[op->dims[i]]);
+            }
+        }
+        for (const auto &index : index1) {
+            // Maybe there are bugs...
+            // assert(index != nullptr);
+            if (index == nullptr) {
+                std::cout << "Warning empty" << std::endl;
+                return {};
+            }
+        }
+        shape0.swap(shape1);
+        shape1.clear();
+        index0.swap(index1);
+        index1.clear();
+    }
+    for (auto &index : index0) {
+        // Maybe there are bugs...
+        assert(index != nullptr);
+        if (index == nullptr)
+            return {};
+        index = SimplifyExprVisitor().simplify(index);
+    }
+    // HACK DLT with paddings: transfer original paddings to the new one
+    vector<int> dltedPaddings =
+        (paddings.size() == shape0.size()) ? paddings : vector<int>{};
+    // TODO [necessary] build DLT source expr. Is OP-based DLT is good too?
+    // HACK [important] fix this fake tensor.
+    auto elementRoutine = make_ref<ElementWiseNode>(
+        // FIXME: implement transpose
+        // makeTensor(newTensorName + "_DLT", {}), vector<Tensor>{tensor},
+        // shape0);
+        makeTensor("__DLT", {}), vector<Tensor>{tensor}, shape0);
+    auto dltedTensor =
+        makeTensor(newTensorName, shape0, dltedPaddings, elementRoutine);
+    auto dltedSubscript = makeSubscript(dltedTensor, index0);
+    return optional<Expr>(std::in_place, dltedSubscript);
+}
+
+optional<pair<Expr, Expr>> DLT::splitIndex(Expr expr, int factor,
+                                           RangeOp rangeOp) {
+    auto strides = SimplifyExprVisitor().getStrides(expr);
+    Expr quotient, remainder;
+    for (const auto &[iter, stride] : strides) {
+        const auto &[var, range] = rangeOp->getVarRange(iter);
+        // Add new expr, dealing with the initial empty expr
+        auto addExpr = [](Expr &orig, const Expr &newExpr) {
+            if (!orig)
+                orig = newExpr;
+            else
+                orig = orig + newExpr;
+        };
+        if (abs(stride) >= factor) {
+            if (stride % factor)
+                return {};
+            addExpr(quotient, (stride / factor) * var);
+        } else {
+            if (stride * (range.second - range.first) > factor)
+                return {};
+            addExpr(remainder, stride * var);
+        }
+    }
+    return optional<pair<Expr, Expr>>(std::in_place, quotient, remainder);
+}
+
+void DLT::split(int dim, int factor) {
+    ops.emplace_back(make_ref<DLTSplit>(dim, factor));
+}
+void DLT::merge(int dim0, int dim1) {
+    ops.emplace_back(make_ref<DLTMerge>(dim0, dim1));
+}
+void DLT::reorder(vector<int> dims) {
+    ops.emplace_back(make_ref<DLTReorder>(dims));
+}
+
+} // namespace nnet
diff --git a/src/nnet/expr.cc b/src/nnet/expr.cc
new file mode 100644
index 00000000..ea25bd5b
--- /dev/null
+++ b/src/nnet/expr.cc
@@ -0,0 +1,452 @@
+#include "nnet/expr.h"
+#include "nnet/Visitor/GetTensorsVisitor.h"
+
+namespace nnet {
+
+string serializeVec(vector<Expr> v) {
+    if (v.empty())
+        return "[]";
+    return "[" +
+           std::accumulate(v.begin() + 1, v.end(), v[0]->toReadable(),
+                           [](const string &a, Expr b) {
+                               return a + ',' + b->toReadable();
+                           }) +
+           "]";
+}
+
+string serializeVec(vector<Var> v) {
+    VecExpr vv;
+    for (const auto &a : v)
+        vv.emplace_back(a);
+    return serializeVec(vv);
+}
+
+std::ostream &operator<<(std::ostream &ios, const ExprNode &expr) {
+    ios << expr.toReadable();
+    return ios;
+}
+
+TensorNode::TensorNode(string _name, vector<int> _shape, vector<int> _paddings,
+                       Routine _source)
+    : name(_name), shape(_shape), paddings(_paddings), source(_source) {
+    if (source && source->getExpr()) {
+        if (auto range = as<RangeOpNode>(source->getExpr()))
+            for (auto [iter, lr] : range->getLoopVarRanges())
+                nnet_assert(lr.first == 0 && lr.second > 0,
+                            "Tensor dims should start from 0.");
+    }
+    if (paddings.size() == 0)
+        paddings = vector<int>(shape.size(), 0);
+    assert(paddings.size() == shape.size());
+}
+
+string TensorNode::toOutputShape() const {
+    return "shape=" + serializeVec(shape) + " pad=" + serializeVec(paddings);
+}
+
+string TensorNode::toReadable() const {
+    string ret = name;
+    string property = "<pad=";
+    bool hasPaddings = false;
+    for (size_t i = 0; i < paddings.size(); ++i) {
+        if (i > 0)
+            property += ",";
+        property += to_string(paddings[i]);
+        if (paddings[i])
+            hasPaddings = true;
+    }
+    property += ">";
+    return (hasPaddings) ? ret + property : ret;
+}
+
+int TensorNode::getData(const Ref<vector<int>> &data, const vector<int> &idx) {
+    assert(idx.size() == shape.size());
+    for (size_t i = 0; i < idx.size(); ++i) {
+        if (idx[i] < 0 || idx[i] >= shape[i]) {
+            assert(0 - paddings[i] <= idx[i]);
+            assert(idx[i] < shape[i] + paddings[i]);
+            return 0;
+        }
+    }
+    return data->at(getOffset(idx));
+}
+
+size_t TensorNode::getOffset(const vector<int> &idx) {
+    auto nDim = idx.size();
+    assert(shape.size() == nDim);
+    if (idx.empty()) {
+        return 0;
+    }
+    for (size_t i = 0; i < nDim; i++) {
+        if (idx[i] < 0 || shape[i] <= idx[i]) {
+            return (size_t)-1;
+        }
+    }
+    size_t offset = idx[0];
+    size_t dim = 0;
+    while (++dim < nDim) {
+        offset = offset * shape[dim] + idx[dim];
+    }
+    return offset;
+}
+
+string RangeOpNode::toReadable() const {
+    string ret;
+    for (int i = 0; i < IterationType::NumIterationType; ++i) {
+        ret += (i == Loop) ? "L" : "Sum";
+        for (const auto &kv : vars[i]) {
+            ret += "<" + kv.first->getName() + ":" +
+                   std::to_string(kv.second.first) + ":" +
+                   std::to_string(kv.second.second) + ">";
+        }
+        if (i == Loop && hasPaddings()) {
+            ret += "<pad=";
+            for (const auto &i : paddings) {
+                ret += to_string(i) + ",";
+            }
+            ret += ">";
+        }
+    }
+    if (auto sub = as<SubscriptNode>(getSummand()); sub) {
+        ret += "  ...  " + serializeVec(sub->getIndex()) + "\n    {" +
+               sub->getObject()->toReadable() + "}";
+    } else {
+        ret += "\n    {" + subExprs[Summand]->toReadable() + "}";
+    }
+    return ret;
+};
+int RangeOpNode::getNumOutputDims() const { return vars[Loop].size(); }
+bool RangeOpNode::hasVar(int index, Var name) const {
+    for (const auto &kv : vars[index])
+        if (kv.first->equal(name))
+            return true;
+    return false;
+}
+int RangeOpNode::getVarIndex(int type, string name) {
+    for (size_t i = 0; i < vars[type].size(); ++i)
+        if (vars[type][i].first->equal(name))
+            return i;
+    assert(0);
+    return 0;
+}
+Range RangeOpNode::getRange(const Var &var) const {
+    for (const auto &varRanges : vars) {
+        for (const auto &varRange : varRanges) {
+            if (varRange.first->equal(var))
+                return varRange.second;
+        }
+    }
+    nnet_assert(0, "Var is not a iterator.");
+    return Range();
+}
+VarRangePair RangeOpNode::getVarRange(const Var &var) const {
+    for (const auto &varRanges : vars) {
+        for (const auto &varRange : varRanges) {
+            if (varRange.first->equal(var))
+                return varRange;
+        }
+    }
+    nnet_assert(0, "Var is not a iterator.");
+    return VarRangePair();
+}
+
+void SubscriptNode::setObject(Expr e) {
+    nnet_assert(as<TensorNode>(e) || as<RangeOpNode>(e),
+                "Illegal subscripted object");
+    indexed = e;
+}
+
+bool SubscriptNode::isRangeOpSubscripted() const {
+    return as<RangeOpNode>(indexed) != nullptr;
+}
+
+vector<Range> SubscriptNode::getObjectRangesWithoutPaddings() const {
+    if (isRangeOpSubscripted()) {
+        return as<RangeOpNode>(indexed)->getOutputRanges();
+    } else {
+        vector<Range> ret;
+        for (const auto &len : as<TensorNode>(indexed)->getShape())
+            ret.emplace_back(0, len);
+        return ret;
+    }
+}
+
+vector<Range> SubscriptNode::getObjectRangesWithPaddings() const {
+    vector<Range> ret;
+    if (isRangeOpSubscripted()) {
+        auto rangeOp = as<RangeOpNode>(indexed);
+        ret = rangeOp->getOutputRanges();
+        for (size_t i = 0; i < ret.size(); ++i) {
+            if (int pad = rangeOp->getPaddings(i)) {
+                ret[i].first -= pad;
+                ret[i].second += pad;
+            }
+        }
+    } else {
+        auto tensor = as<TensorNode>(indexed);
+        for (const auto &len : tensor->getShape())
+            ret.emplace_back(0, len);
+        for (int i = 0; i < tensor->getDims(); ++i) {
+            if (int pad = tensor->getPadding(i)) {
+                ret[i].first -= pad;
+                ret[i].second += pad;
+            }
+        }
+    }
+    return ret;
+}
+
+optional<pair<Iterator, int>> BinaryOpNode::getModDivParameter() const {
+    auto lhs = as<VarNode>(getLhs());
+    auto rhs = as<ConstantNode>(getRhs());
+    if (lhs == nullptr) {
+        return {};
+    }
+    if (lhs->getType() != NodeType::VarNodeType) {
+        nnet_unimplemented_halt();
+    }
+    if (rhs->getType() != NodeType::ConstantNodeType) {
+        nnet_unimplemented_halt();
+    }
+    assert(rhs != nullptr);
+    return pair(lhs, rhs->getValue());
+}
+
+pair<Expr, int> BinaryOpNode::getModDivExpr() const {
+    auto constant = as<ConstantNode>(getRhs());
+    assert(constant != nullptr);
+    return pair(getLhs(), constant->getValue());
+}
+
+string BinaryOpNode::toReadable() const {
+    string ret = "(";
+    ret += subExprs[LHS]->toReadable();
+    ret += " ";
+    ret += opSymbols[static_cast<std::underlying_type_t<OpType>>(opType)];
+    ret += " ";
+    ret += subExprs[RHS]->toReadable();
+    return ret + ")";
+};
+
+bool BinaryOpNode::isSwapable() const {
+    switch (getOpType()) {
+    case OpType::Add:
+    case OpType::Mul:
+        return true;
+    case OpType::Sub:
+    case OpType::Div:
+    case OpType::Mod:
+        return false;
+    default:
+        nnet_unimplemented_halt();
+        return false;
+    }
+}
+
+string SubscriptNode::toReadable() const {
+    string ret;
+    ret += "{";
+    ret += indexed->toReadable();
+    ret += "}[";
+    for (size_t i = 0; i < subExprs.size(); ++i) {
+        ret += subExprs[i]->toReadable();
+        if (i != subExprs.size() - 1)
+            ret += ", ";
+        else
+            ret += "]";
+    }
+    return ret;
+};
+
+string FuncNode::toReadable() const {
+    string ret;
+    if (funcType == FuncType::Relu)
+        ret += "Relu";
+    else if (funcType == FuncType::Tanh)
+        ret += "Tanh";
+    else
+        nnet_unimplemented_halt();
+    ret += "(  ...  " + serializeVec(object->getIndex()) + ")\n    {" +
+           object->getObject()->toReadable() + "}";
+    return ret;
+}
+
+Expr operator+(const Expr &lhs, const Expr &rhs) {
+    if (lhs == nullptr && rhs == nullptr)
+        return nullptr;
+    else if (lhs == nullptr)
+        return rhs;
+    else if (rhs == nullptr)
+        return lhs;
+    else
+        return make_ref<BinaryOpNode>(OpType::Add, lhs, rhs);
+}
+
+BinaryOp operator-(const Expr &lhs, const Expr &rhs) {
+    return make_ref<BinaryOpNode>(OpType::Sub, lhs, rhs);
+}
+
+BinaryOp operator*(const Expr &lhs, const Expr &rhs) {
+    return make_ref<BinaryOpNode>(OpType::Mul, lhs, rhs);
+}
+
+BinaryOp operator/(const Expr &lhs, const Expr &rhs) {
+    return make_ref<BinaryOpNode>(OpType::Div, lhs, rhs);
+}
+
+BinaryOp operator%(const Expr &lhs, const Expr &rhs) {
+    return make_ref<BinaryOpNode>(OpType::Mod, lhs, rhs);
+}
+
+Expr operator+(const Expr &lhs, const int &rhs) {
+    if (lhs != nullptr && rhs != 0)
+        return make_ref<BinaryOpNode>(OpType::Add, lhs,
+                                      make_ref<ConstantNode>(rhs));
+    else if (lhs == nullptr)
+        return make_ref<ConstantNode>(rhs);
+    else
+        return lhs;
+}
+
+Expr operator+(const int &lhs, const Expr &rhs) { return rhs + lhs; }
+
+Expr operator-(const Expr &lhs, const int &rhs) { return lhs + (-rhs); }
+
+Expr operator-(const int &lhs, const Expr &rhs) {
+    if (rhs != nullptr)
+        return make_ref<BinaryOpNode>(OpType::Sub, make_ref<ConstantNode>(lhs),
+                                      rhs);
+    else
+        return make_ref<ConstantNode>(lhs);
+}
+
+Expr operator*(const Expr &lhs, const int &rhs) {
+    if (rhs == 1)
+        return lhs;
+    else
+        return make_ref<BinaryOpNode>(OpType::Mul, lhs,
+                                      make_ref<ConstantNode>(rhs));
+}
+
+Expr operator*(const int &lhs, const Expr &rhs) {
+    if (lhs == 1)
+        return rhs;
+    else
+        return make_ref<BinaryOpNode>(OpType::Mul, make_ref<ConstantNode>(lhs),
+                                      rhs);
+}
+
+bool operator==(const Var &lhs, const string &rhs) {
+    return lhs->getName() == rhs;
+}
+
+bool operator==(const string &lhs, const Var &rhs) { return rhs == lhs; }
+Expr operator%(const Expr &lhs, const int rhs) {
+    return make_ref<BinaryOpNode>(OpType::Mod, lhs,
+                                  make_ref<ConstantNode>(rhs));
+}
+Expr operator/(const Expr &lhs, const int rhs) {
+    if (rhs == 1)
+        return lhs;
+    else
+        return make_ref<BinaryOpNode>(OpType::Div, lhs,
+                                      make_ref<ConstantNode>(rhs));
+}
+
+// Wrappers for type deduction
+Subscript makeSubscript(const Expr &tensor, const VecExpr &subscripts) {
+    return make_ref<SubscriptNode>(tensor, subscripts);
+}
+
+RangeOp makeRangeOperator(const vector<VarRangePair> &_loopIters,
+                          const vector<VarRangePair> &_sumIters, Expr _summand,
+                          const vector<int> &paddings) {
+    return make_ref<RangeOpNode>(_loopIters, _sumIters, _summand, paddings);
+}
+
+// Wrappers for type deduction
+Tensor makeTensor(const string &name, const vector<int> &shape,
+                  const vector<int> &paddings, const Routine &source) {
+    if (paddings.size() == 0)
+        return make_ref<TensorNode>(name, shape,
+                                    vector<int>((int)shape.size(), 0), source);
+    else
+        return make_ref<TensorNode>(name, shape, paddings, source);
+}
+
+int64_t TensorNode::getSize() const {
+    int64_t size = 1;
+    for (auto len : shape)
+        size *= len;
+    return size;
+}
+int RangeOpNode::getPaddings(int dim) const {
+    return dim < (int)paddings.size() ? paddings[dim] : 0;
+}
+
+vector<int> RangeOpNode::getPaddings() const {
+    if (paddings.size() > 0)
+        return paddings;
+    else
+        return vector<int>(getNumOutputDims(), 0);
+}
+
+void RangeOpNode::setPaddings(vector<int> _paddings) { paddings = _paddings; }
+
+bool RangeOpNode::hasPaddings() const {
+    for (const auto &p : paddings)
+        if (p > 0)
+            return true;
+    return false;
+}
+
+int64_t RangeOpNode::getFlops() const {
+    int64_t sumCnt = getOutputSize();
+    if (vars[Sum].empty())
+        sumCnt = 0;
+    else
+        for (const auto &[var, range] : getSumVarRanges())
+            sumCnt *= range.second - range.first;
+    return sumCnt;
+}
+
+int64_t RangeOpNode::getInputSize(const RangeOp &self) const {
+    int64_t ret = 0;
+    auto tensors = GetTensorsVisitor().get(self);
+    for (const auto &[name, tensor] : tensors)
+        ret += tensor->getSize();
+    return ret;
+}
+
+int64_t RangeOpNode::getOutputSize() const {
+    int64_t loopCnt = 1;
+    for (const auto &[var, range] : getLoopVarRanges())
+        loopCnt *= range.second - range.first;
+    return loopCnt;
+}
+
+vector<int> RangeOpNode::getOutputShape() const {
+    vector<int> ret;
+    for (const auto &[var, range] : getLoopVarRanges())
+        ret.emplace_back(range.second - range.first);
+    return ret;
+}
+
+vector<Range> RangeOpNode::getOutputRanges() const {
+    vector<Range> ret;
+    for (const auto &[var, range] : getLoopVarRanges())
+        ret.emplace_back(range);
+    for (size_t i = 0; i < paddings.size(); ++i)
+        if (paddings[i] > 0) {
+            ret[i].first -= paddings[i];
+            ret[i].second += paddings[i];
+        }
+    return ret;
+}
+
+void FuncNode::setObject(Expr e) {
+    object = as<SubscriptNode>(e);
+    nnet_assert(object, "Illegal subscripted object");
+}
+
+} // namespace nnet
diff --git a/src/nnet/iterator_table.cc b/src/nnet/iterator_table.cc
new file mode 100644
index 00000000..b89769cd
--- /dev/null
+++ b/src/nnet/iterator_table.cc
@@ -0,0 +1,653 @@
+#include "nnet/iterator_table.h"
+#include "nnet/Visitor/MatchTableVisitor.h"
+#include "nnet/Visitor/SimplifyExprVisitor.h"
+#include "nnet/permutation.h"
+#include <iostream>
+
+namespace nnet {
+
+bool IteratorTable::analyzeExpr(const RangeOp &_rangeOp) {
+    rangeOp = _rangeOp;
+    MatchTableVisitor mtVisitor;
+    if (!mtVisitor(rangeOp))
+        return false;
+    tie(appearance, tensors, strideTable, subscripts) = mtVisitor.getResult();
+    // dbg(appearance, tensors, strideTable);
+    return true;
+}
+
+// mapTensors
+void IteratorTable::buildTable(const vector<int> &_tensorMap) {
+    tensorMap = _tensorMap;
+    tensorMap_r.clear();
+    tensorMap_r.resize(getNumInputs());
+    tensorIDMap_r.clear();
+    tensorIDMap_r.resize(getNumInputs());
+    posTable.clear();
+    posTable.resize(getNumRows());
+    strideInTensor.clear();
+
+    // build reversed index (from tensorID to tensor/original tensor index)
+    for (size_t i = 0; i < tensorMap.size(); ++i) {
+        tensorMap_r[tensorMap[i]] = tensors[i];
+        tensorIDMap_r[tensorMap[i]] = i;
+    }
+    strideInDim.clear();
+    strideInDim.resize(getNumInputs());
+    for (int i = 0; i < getNumInputs(); ++i)
+        strideInDim[i].resize(getTensor(i)->getDims());
+
+    // auxiliary array for calculate in-dim stride
+    vector<vector<int>> ldaInTensors(getNumInputs());
+    for (int i = 0; i < getNumInputs(); ++i) {
+        ldaInTensors[i].resize(getTensor(i)->getDims());
+        ldaInTensors[i].back() = 1;
+        for (int j = getTensor(i)->getDims() - 2; j >= 0; --j)
+            ldaInTensors[i][j] =
+                ldaInTensors[i][j + 1] * getTensor(i)->getShape(j + 1);
+    }
+
+    map<TensorNode *, int> inputTensor2id;
+    for (int i = 0; i < getNumInputs(); ++i)
+        inputTensor2id[tensors[i].get()] = tensorMap[i];
+
+    iterInTensorDim.clear();
+    iterInTensorDim.resize(getNumInputs());
+    for (int i = 0; i < getNumInputs(); ++i) {
+        iterInTensorDim[tensorMap[i]].resize(tensors[i]->getDims());
+    }
+
+    for (const auto &[var, tds] : strideTable) {
+        int bitmap = 0;
+        strideInTensor.emplace(var, getNumInputs());
+        for (const auto &[tensorNode, dim, stride] : tds) {
+            int tensorID = inputTensor2id[tensorNode];
+            int bit = 1 << tensorID;
+            if (std::find_if(iterInTensorDim[tensorID][dim].begin(),
+                             iterInTensorDim[tensorID][dim].end(),
+                             [var = var](const Var &v) {
+                                 return v->equal(var);
+                             }) == iterInTensorDim[tensorID][dim].end())
+                iterInTensorDim[tensorID][dim].emplace_back(var);
+            bitmap |= bit;
+            if (strideInTensor[var][tensorID] < 0 || stride < 0)
+                strideInTensor[var][tensorID] = -1;
+            else
+                strideInTensor[var][tensorID] += stride;
+            // Update strideInDim
+            assert(stride % ldaInTensors[tensorID][dim] == 0);
+            strideInDim[tensorID][dim][var] =
+                stride / ldaInTensors[tensorID][dim];
+        }
+        if (rangeOp->hasLoopVar(var))
+            bitmap |= 1 << getNumInputs();
+        posTable[bitmap].emplace_back(var);
+    }
+}
+
+void IteratorTable::buildTableWithDefaultMap() {
+    vector<int> tensorMap;
+    for (int i = 0; i < getNumInputs(); ++i)
+        tensorMap.emplace_back(i);
+    buildTable(tensorMap);
+}
+int IteratorTable::getStridesInTensor(Iterator iter, int tensorID) const {
+    return strideInTensor.at(iter).at(tensorID);
+}
+
+vector<int> IteratorTable::getIterDimInTensor(int tensorID,
+                                              const Iterator &iter) const {
+    vector<int> ret;
+    for (size_t i = 0; i < iterInTensorDim[tensorID].size(); ++i) {
+        for (const auto &it : iterInTensorDim[tensorID][i])
+            if (iter->equal(it))
+                ret.emplace_back(i);
+    }
+    return ret;
+}
+
+vector<Mismatch> IteratorTable::matchPatternIT(const Pattern &patternIT) {
+    vector<Mismatch> ret;
+    iterMap.clear();
+    vector<vector<Iterator>> multiExprVar, multiPatternVar;
+    // match iterators in single iterator rows
+    for (int row = 0; row < getNumRows(); ++row) {
+        int nExprVars = posTable[row].size(),
+            nPatternVars = patternIT.posTable[row].size();
+        if (nExprVars < nPatternVars) {
+            ret.emplace_back(MismatchType::LessVar, row);
+            continue;
+        }
+        if (nExprVars > nPatternVars) {
+            ret.emplace_back(MismatchType::MoreVar, row);
+            continue;
+        }
+        if (posTable[row].empty())
+            continue;
+        // prepare for many-to-many iterator mapping
+        if (posTable[row].size() > 1) {
+            multiExprVar.emplace_back(posTable[row]);
+            multiPatternVar.emplace_back(patternIT.posTable[row]);
+        }
+        assert(!iterMap.count(posTable[row][0])); // check NO duplicate mapping
+        if (posTable[row].size() == 1)
+            iterMap[posTable[row][0]] = patternIT.posTable[row][0];
+    }
+    if (!ret.empty())
+        return ret;
+    PermutationGenerator permutationGenerator{multiPatternVar, multiExprVar};
+    bool checked = false;
+    // Permute iterator mappings to find a matched case
+    do {
+        auto mappingIter_r = permutationGenerator.get();
+        for (const auto &[exprIter, patternIter] : iterMap)
+            mappingIter_r[patternIter] = exprIter;
+        auto mismatches = matchPatternITCheckStrides(patternIT, mappingIter_r);
+        // if (mappingIter_r.count("_Conv_c"))
+        //     if (mappingIter_r["_Conv_n"] == "n" &&
+        //         mappingIter_r["_Conv_c"] == "c" &&
+        //         mappingIter_r["_Conv_h"] == "i22" &&
+        //         mappingIter_r["_Conv_r"] == "i4" &&
+        //         mappingIter_r["_Conv_w"] == "i17" &&
+        //         mappingIter_r["_Conv_s"] == "i14") {
+        //         dbg(ret.size());
+        //         if (mismatches.size() > 0)
+        //             dbg(mismatches.size(), mismatches[0].type);
+        //     }
+        if (mismatches.size() == 0) { // matched
+            ret = mismatches;
+            // Complete iterator mapping
+            for (const auto &[patternIter, exprIter] : mappingIter_r) {
+                if (iterMap.count(exprIter))
+                    assert(iterMap[exprIter]->equal(patternIter));
+                iterMap[exprIter] = patternIter;
+            }
+            break;
+        } else if (!checked) {
+            ret = mismatches;
+            checked = true;
+        } else if ((static_cast<int>(ret[0].type) <
+                    static_cast<int>(mismatches[0].type)) ||
+                   ((static_cast<int>(ret[0].type) ==
+                     static_cast<int>(mismatches[0].type)) &&
+                    (mismatches.size() < ret.size()))) {
+            ret = mismatches;
+        }
+    } while (permutationGenerator.next());
+    // Build reverse iterator mapping
+    if (ret.empty()) {
+        iterToRange_r.clear();
+        for (const auto &[exprIter, patternIter] : iterMap)
+            iterToRange_r[patternIter] = rangeOp->getVarRange(exprIter);
+    }
+    return ret;
+}
+
+vector<Mismatch> IteratorTable::matchPatternITCheckStrides(
+    const Pattern &patternIT, PtrMap<Iterator, Iterator> mappingIter_r) {
+    vector<Mismatch> ret;
+    // Check strides against each stride constraint
+    for (const auto &constraint : patternIT.getStrideConstraints()) {
+        // TODO: supprot PROPOTIONAL constraint
+        auto stride0 = strideInTensor.at(
+            mappingIter_r[constraint.v0])[constraint.tensorID];
+        auto stride1 = strideInTensor.at(
+            mappingIter_r[constraint.v1])[constraint.tensorID];
+        if (stride0 != stride1) {
+            ret.emplace_back(Mismatch(MismatchType::StrideMismatch, -1));
+        }
+    }
+    if (!ret.empty())
+        return ret;
+    // check the appearance of iterators inside tensors.
+    // If mismatch, this can be repaired by guided DLT.
+    for (int tensorID = 0; tensorID < getNumInputs(); ++tensorID) {
+        int exprTensorDim = tensorMap_r[tensorID]->getDims();
+        int patternTensorDim = patternIT.tensorMap_r[tensorID]->getDims();
+        if (exprTensorDim != patternTensorDim) {
+            ret.emplace_back(MismatchType::DLMismatch, tensorID);
+            continue;
+        }
+        [&] {
+            for (int dim = 0; dim < exprTensorDim; ++dim) {
+                // If #iters is differnt, than DLT is required
+                if (strideInDim[tensorID][dim].size() !=
+                    patternIT.strideInDim[tensorID][dim].size()) {
+                    ret.emplace_back(MismatchType::DLMismatch, tensorID);
+                    return;
+                }
+                for (const auto &[patternIter, patternStride] :
+                     patternIT.strideInDim[tensorID][dim]) {
+                    auto exprIter = mappingIter_r[patternIter];
+                    // If iters are differnt
+                    if (!strideInDim[tensorID][dim].count(exprIter)) {
+                        ret.emplace_back(MismatchType::DLMismatch, tensorID);
+                        return;
+                    }
+                    auto exprStride = strideInDim[tensorID][dim].at(exprIter);
+                    // TODO: for stride and dilation
+                    if (exprStride != patternStride) {
+                        ret.emplace_back(MismatchType::DLMismatch, tensorID);
+                        return;
+                    }
+                }
+            }
+        }();
+    }
+    if (!ret.empty())
+        return ret;
+    // check output data layout
+    // Output dim mismatch is not implemented.
+    if (patternIT.rangeOp->getNumOutputDims() != rangeOp->getNumOutputDims()) {
+        ret.emplace_back(Mismatch{MismatchType::OutputDimismatch, 0});
+        return ret;
+    }
+    for (size_t i = 0; i < rangeOp->getLoopVarRanges().size(); ++i) {
+        if (!mappingIter_r[patternIT.rangeOp->getLoopVar(i)]->equal(
+                rangeOp->getLoopVar(i))) {
+            ret.emplace_back(MismatchType::OutputDLMismatch, getNumInputs(),
+                             mappingIter_r);
+            break;
+        }
+    }
+    return ret;
+}
+
+pair<PatternTensorMap, PatternIterRangeMap>
+IteratorTable::getReverseMap() const {
+    return {tensorMap_r, iterToRange_r};
+}
+
+bool Pattern::isAllUniqueAccess(int tensorID) const {
+    for (const auto &iterInDim : iterInTensorDim[tensorID]) {
+        if (iterInDim.size() != 1)
+            return false;
+    }
+    return true;
+}
+
+Expr MatmulPattern::buildExpr(
+    const Expr &expr, const vector<Tensor> &tensors,
+    [[maybe_unused]] const PatternIterRangeMap &varRanges, string outputName,
+    [[maybe_unused]] const IteratorTable &exprIT) const {
+    // TODO support b
+    assert(tensors.size() == 2);
+    int b = 1;
+    int m = tensors[0]->getShape(0), n = tensors[1]->getShape(0);
+    int k = tensors[0]->getShape(1);
+    // TODO: check strides
+    // TODO: DLT for output?
+    // FIXME: check the trans
+    auto matmul = make_ref<MatmulNode>(expr, tensors[0], tensors[1], b, m, n, k,
+                                       false, true);
+    auto output = make_ref<TensorNode>(outputName, vector<int>{m, n},
+                                       vector<int>{0, 0}, matmul);
+    return output;
+}
+
+const Pattern &MatmulPattern::getMatmulPattern() {
+    static class MatmulPattern exprIT;
+    static bool inited = false;
+    if (!inited) {
+        inited = true;
+        int M = 224, N = 8, K = 16;
+        auto m = make_ref<VarNode>("_Matmul_m");
+        auto n = make_ref<VarNode>("_Matmul_n");
+        auto k = make_ref<VarNode>("_Matmul_k");
+        auto A = make_ref<TensorNode>("_Matmul_A", vector<int>({M, K}));
+        auto B = make_ref<TensorNode>("_Matmul_B", vector<int>({N, K}));
+        auto subA = makeSubscript(A, {m, k});
+        auto subB = makeSubscript(B, {n, k});
+        auto range = makeRangeOperator({{m, {0, M}}, {n, {0, N}}},
+                                       {{k, {0, K}}}, subA * subB);
+        auto success = exprIT.analyzeExpr(range);
+        assert(success);
+        exprIT.buildTable({0, 1});
+    }
+    return exprIT;
+}
+
+const Pattern &ConvPattern::getPattern() {
+    static class ConvPattern exprIT;
+    static bool inited = false;
+    if (!inited) {
+        inited = true;
+        // The shape is meaningless but cannot be zero IT building
+        int N = 8, C = 16, H = 224, W = 224, F = 16, R = 3, S = 3;
+        // auto n = make_ref<VarNode>("_Matmul_n");
+        auto A = make_ref<TensorNode>("_Conv_A", vector<int>({N, C, H, W}));
+        auto B = make_ref<TensorNode>("_Conv_K", vector<int>({F, C, R, S}));
+        auto subA = makeSubscript(A, {n, c, h + r, w + s});
+        auto subB = makeSubscript(B, {f, c, r, s});
+        auto range = makeRangeOperator(
+            {{n, {0, 0}}, {f, {0, 0}}, {h, {0, 0}}, {w, {0, 0}}},
+            {{c, {0, 0}}, {r, {0, 0}}, {s, {0, 0}}}, subA * subB);
+        auto success = exprIT.analyzeExpr(range);
+        assert(success);
+        exprIT.buildTable({0, 1});
+    }
+    return exprIT;
+}
+
+Expr ConvPattern::buildExpr(
+    const Expr &expr, const vector<Tensor> &tensors,
+    const PatternIterRangeMap &varRanges, string outputName,
+    [[maybe_unused]] const IteratorTable &exprIT) const {
+    // calculate paddings
+    const auto &rangeH = varRanges.at(h).second;
+    const auto &rangeR = varRanges.at(r).second;
+    const auto &rangeW = varRanges.at(w).second;
+    const auto &rangeS = varRanges.at(s).second;
+    auto offsetH =
+        SimplifyExprVisitor().getConstant(exprIT.getSubscript(0)->getIndex(2));
+    auto offsetW =
+        SimplifyExprVisitor().getConstant(exprIT.getSubscript(0)->getIndex(3));
+    int ph = calcPadding(tensors[0], 2, rangeH, rangeR, offsetH);
+    int pw = calcPadding(tensors[0], 3, rangeW, rangeS, offsetW);
+
+    // TODO strided, dilated
+    auto conv = make_ref<ConvNode>(expr, tensors[0], tensors[1], ph, pw);
+    auto shape = conv->getShape();
+    auto rangeOpShape = as<RangeOpNode>(expr)->getOutputShape();
+    assert(shape.size() == rangeOpShape.size());
+    dbg(shape, rangeOpShape);
+    for (size_t i = 0; i < shape.size(); ++i) {
+        if (shape[i] != rangeOpShape[i]) {
+            dbg("Warning: unmatched Conv output", shape, rangeOpShape);
+            return nullptr;
+        }
+    }
+    auto output =
+        make_ref<TensorNode>(outputName, shape, vector<int>{0, 0, 0, 0}, conv);
+    return output;
+}
+
+RangeOp IteratorTable::getRangeOp() const { return rangeOp; }
+
+#define DEF_CONV_VAR(a)                                                        \
+    const Var ConvPattern::a = make_ref<VarNode>("_Conv_" #a)
+DEF_CONV_VAR(n);
+DEF_CONV_VAR(c);
+DEF_CONV_VAR(h);
+DEF_CONV_VAR(w);
+DEF_CONV_VAR(f);
+DEF_CONV_VAR(r);
+DEF_CONV_VAR(s);
+#undef DEF_CONV_VAR
+
+int Pattern::calcPadding(const Tensor &tensor, int dim, Range rangeH,
+                         Range rangeR, int offset) const {
+    int l = rangeH.first + rangeR.first + offset;
+    int r = rangeH.second + rangeR.second - 1 + offset;
+    int ret = max(0, max(0 - l, r - tensor->getShape(dim)));
+    // dbg(l, r, rangeH, rangeR, offset, ret, tensor->getPadding(dim));
+    // check OutOfBound
+    assert(ret <= tensor->getPadding(dim));
+    return ret;
+}
+
+#define DEF_SG2BMM_VAR(a)                                                      \
+    const Var Sg2bmmPattern::a = make_ref<VarNode>("_Sg2bmm_" #a)
+DEF_SG2BMM_VAR(b);
+DEF_SG2BMM_VAR(m);
+DEF_SG2BMM_VAR(w);
+DEF_SG2BMM_VAR(k);
+#undef DEF_SG2BMM_VAR
+
+const Pattern &Sg2bmmPattern::getPattern() {
+    static class Sg2bmmPattern exprIT;
+    static bool inited = false;
+    if (!inited) {
+        inited = true;
+        // The shape is meaningless but cannot be zero IT building
+        int Batch = 8, M = 32, K = 224, W = 2;
+        // auto n = make_ref<VarNode>("_Matmul_n");
+        auto A = make_ref<TensorNode>("_Sg2bmm_A", vector<int>{Batch, M, K});
+        auto B = make_ref<TensorNode>("_Sg2bmm_B", vector<int>{Batch, M, K});
+        auto subA = makeSubscript(A, {b, m, k});
+        auto subB = makeSubscript(B, {b, m + w, k});
+        auto range =
+            makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {w, {-W, W + 1}}},
+                              {{k, {0, K}}}, subA * subB);
+        auto success = exprIT.analyzeExpr(range);
+        assert(success);
+        exprIT.buildTableWithDefaultMap();
+    }
+    return exprIT;
+}
+
+Expr Sg2bmmPattern::buildExpr(
+    const Expr &expr, const vector<Tensor> &tensors,
+    [[maybe_unused]] const PatternIterRangeMap &varRanges, string outputName,
+    [[maybe_unused]] const IteratorTable &exprIT) const {
+    // calculate paddings
+    assert(tensors.size() == 2);
+    assert(tensors[0]->getDims() == 3 && tensors[1]->getDims() == 3);
+    int Batch = tensors[0]->getShape(0);
+    int M = tensors[0]->getShape(1);
+    int K = tensors[0]->getShape(2);
+    int W = getLength(varRanges.at(w).second) / 2;
+
+    auto op = make_ref<G2bmmNode>(expr, tensors[0], tensors[1], Batch, M, W, K);
+    auto shape = op->getShape();
+    auto rangeOpShape = as<RangeOpNode>(expr)->getOutputShape();
+    assert(shape.size() == rangeOpShape.size());
+    for (size_t i = 0; i < shape.size(); ++i) {
+        assert(shape[i] == rangeOpShape[i]);
+    }
+    auto output =
+        make_ref<TensorNode>(outputName, shape, vector<int>{0, 0, 0}, op);
+    return output;
+}
+
+#define DEF_LongformerGBMM_VAR(a)                                              \
+    const Var LongformerGBMMPattern::a = make_ref<VarNode>("_lo_" #a)
+DEF_LongformerGBMM_VAR(b);
+DEF_LongformerGBMM_VAR(m);
+DEF_LongformerGBMM_VAR(w);
+DEF_LongformerGBMM_VAR(n);
+#undef DEF_LongformerGBMM_VAR
+
+const Pattern &LongformerGBMMPattern::getPattern() {
+    static class LongformerGBMMPattern exprIT;
+    static bool inited = false;
+    if (!inited) {
+        inited = true;
+        // The shape is meaningless but cannot be zero IT building
+        int Batch = 8, M = 32, N = 224, W = 2;
+        auto A =
+            make_ref<TensorNode>("_lo_A", vector<int>{Batch, M, 2 * W + 1});
+        auto B = make_ref<TensorNode>("_lo_B", vector<int>{Batch, M, N});
+        auto subA = makeSubscript(A, {b, m, w});
+        auto subB = makeSubscript(B, {b, m + w, n});
+        auto range =
+            makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {n, {0, M}}},
+                              {{w, {-W, W + 1}}}, subA * subB);
+        auto success = exprIT.analyzeExpr(range);
+        assert(success);
+        exprIT.buildTableWithDefaultMap();
+    }
+    return exprIT;
+}
+
+Expr LongformerGBMMPattern::buildExpr(
+    const Expr &expr, const vector<Tensor> &tensors,
+    [[maybe_unused]] const PatternIterRangeMap &varRanges, string outputName,
+    [[maybe_unused]] const IteratorTable &exprIT) const {
+    // calculate paddings
+    assert(tensors.size() == 2);
+    assert(tensors[0]->getDims() == 3 && tensors[1]->getDims() == 3);
+    int Batch = tensors[0]->getShape(0);
+    int M = tensors[0]->getShape(1);
+    assert(tensors[0]->getShape(2) % 2 == 1);
+    int W = tensors[0]->getShape(2) / 2;
+    int N = tensors[1]->getShape(2);
+
+    auto op = make_ref<GbmmNode>(expr, tensors[0], tensors[1], Batch, M, W, N);
+    auto shape = op->getShape();
+    auto rangeOpShape = as<RangeOpNode>(expr)->getOutputShape();
+    assert(shape.size() == rangeOpShape.size());
+    for (size_t i = 0; i < shape.size(); ++i) {
+        assert(shape[i] == rangeOpShape[i]);
+    }
+    auto output = make_ref<TensorNode>(outputName, shape,
+                                       vector<int>(shape.size(), 0), op);
+    return output;
+}
+
+const Pattern &getPattern(RoutineType targetOp) {
+    switch (targetOp) {
+    case RoutineType::MatmulNodeType:
+        return MatmulPattern::getMatmulPattern();
+    case RoutineType::ConvNodeType:
+        return ConvPattern::getPattern();
+    case RoutineType::G2bmmNodeType:
+        return Sg2bmmPattern::getPattern();
+    case RoutineType::GbmmNodeType:
+        return LongformerGBMMPattern::getPattern();
+    default:
+        nnet_unimplemented_halt();
+    }
+}
+
+string getPatternName(RoutineType targetOp) {
+    switch (targetOp) {
+    case RoutineType::MatmulNodeType:
+        return "Matmul";
+    case RoutineType::ConvNodeType:
+        return "Conv";
+    case RoutineType::G2bmmNodeType:
+        return "G2bmm";
+    case RoutineType::GbmmNodeType:
+        return "Gbmm";
+    default:
+        nnet_unimplemented_halt();
+    }
+    return {};
+}
+
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+Expr ConvPattern::getExpr(Tensor A, Tensor K, int N, int C, int H, int W, int F,
+                          int R, int S) {
+    DEFINE_VAR(n);
+    DEFINE_VAR(c);
+    DEFINE_VAR(h);
+    DEFINE_VAR(w);
+    DEFINE_VAR(f);
+    DEFINE_VAR(r);
+    DEFINE_VAR(s);
+    auto subA = makeSubscript(A, {n, c, h + r - R / 2, w + s - S / 2});
+    auto subB = makeSubscript(K, {f, c, r, s});
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subB);
+    return range;
+}
+
+// Warn: F is the number of input channels, which is inversed compared with
+// normal Conv.
+// Input / output layouts: NHWF -> NHWC, Kenrel: RSFC
+Expr ConvTransPattern::getExpr(Tensor A, Tensor K, int N, int C, int H, int W,
+                               int F, int R, int S) {
+    const int padding = 1 * (R - 1) - 1;
+    assert(A->getPadding(2) == padding);
+    assert(R == 4);
+    assert(S == 4);
+    const int OH = 2 * H, OW = 2 * W;
+    DEFINE_VAR(n);
+    DEFINE_VAR(c);
+    DEFINE_VAR(f);
+    DEFINE_VAR(r);
+    DEFINE_VAR(s);
+    DEFINE_VAR(x1);
+    DEFINE_VAR(x2);
+    DEFINE_VAR(y1);
+    DEFINE_VAR(y2);
+    DEFINE_VAR(i2);
+    DEFINE_VAR(i4);
+    DEFINE_VAR(h);
+    DEFINE_VAR(w);
+    // dilation * (kernel_size - 1) - padding
+    // auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
+    //                               vector<int>{0, padding, padding, 0});
+    // auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+
+    auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
+    auto subK =
+        makeSubscript(K, {(R - 2) - 2 * r + x2, (S - 2) - 2 * s + y2, f, c});
+    // x1=(h+1)//2, x2=(h+1)%2, y1=(w+1)//2
+
+    auto range1 = makeRangeOperator(
+        {
+            {n, {0, N}},
+            {c, {0, C}},
+            {x1, {0, OH / 2 + 1}},
+            {x2, {0, 2}},
+            {y1, {0, OW / 2 + 1}},
+            {y2, {0, 2}},
+        },
+        {{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
+    auto sub0 = makeSubscript(
+        range1, {n, c, (h + 1) / 2, (h + 1) % 2, (w + 1) / 2, (w + 1) % 2});
+    auto range0 = makeRangeOperator(
+        {{n, {0, N}}, {h, {0, OH}}, {w, {0, OW}}, {c, {0, C}}}, {}, sub0);
+    return range0;
+}
+
+pair<Expr, pair<Tensor, Tensor>> Sg2bmmPattern::getExpr(int Batch, int M, int K,
+                                                        int W, int D) {
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, K}),
+                                  vector<int>{0, 0, 0});
+    auto B = make_ref<TensorNode>("B", vector<int>({Batch, M, K}),
+                                  vector<int>{0, D * W, 0});
+
+    auto subA = makeSubscript(A, {b, m, k});
+    auto subB = makeSubscript(B, {b, m + D * (w - W), k});
+    auto range =
+        makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {w, {0, 2 * W + 1}}},
+                          {{k, {0, K}}}, subA * subB);
+    return {range, {A, B}};
+}
+
+pair<Expr, pair<Tensor, Tensor>>
+LongformerGBMMPattern::getExpr(int Batch, int M, int W, int K, int dilation) {
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, 2 * W + 1}),
+                                  vector<int>{0, 0, 0});
+    auto B = make_ref<TensorNode>("B", vector<int>({Batch, M, K}),
+                                  vector<int>{0, dilation * W, 0});
+    auto subA = makeSubscript(A, {b, m, w});
+    auto subB = makeSubscript(B, {b, m + dilation * w - dilation * W, n});
+    auto range = makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {n, {0, K}}},
+                                   {{w, {0, 2 * W + 1}}}, subA * subB);
+    return {range, {A, B}};
+}
+
+pair<Expr, pair<Tensor, Tensor>> MatmulPattern::getExpr(bool transA,
+                                                        bool transB, int Batch,
+                                                        int M, int N, int K) {
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(n);
+    DEFINE_VAR(k);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, K}),
+                                  vector<int>{0, 0, 0});
+    auto B = make_ref<TensorNode>("B", vector<int>({Batch, K, N}),
+                                  vector<int>{0, 0, 0});
+    auto subA = makeSubscript(A, {b, m, k});
+    auto subB = makeSubscript(B, {b, k, n});
+    auto range = makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {n, {0, N}}},
+                                   {{k, {0, K}}}, subA * subB);
+    return {range, {A, B}};
+}
+
+#undef DEFINE_VAR
+
+} // namespace nnet
diff --git a/src/nnet/nmutator.cc b/src/nnet/nmutator.cc
new file mode 100644
index 00000000..8e030064
--- /dev/null
+++ b/src/nnet/nmutator.cc
@@ -0,0 +1,722 @@
+#include "nnet/nmutator.h"
+#include "nnet/Visitor/FullPrinterVisitor.h"
+#include "nnet/Visitor/GetTensorsVisitor.h"
+#include "nnet/Visitor/MatchReshapeVisitor.h"
+#include "nnet/derivator.h"
+
+#ifdef ABC
+namespace infini {
+
+NMutator::NMutator() {}
+
+NMutator::NMutator(const std::vector<int> &derivationRules)
+    : mode{Mode::RuleBased}, derivationRules{derivationRules} {}
+
+NMutator::~NMutator() {}
+
+void NMutator::setToNaiveMembound() { mode = Mode::ToNaiveMembound; }
+
+vector<Graph> NMutator::run(const Graph &in_graph) {
+    vector<Graph> out_graphs;
+    // Test helper: naively transform one Op to Membound
+    if (mode == Mode::ToNaiveMembound) {
+        runSingleOpToNaiveMembound(in_graph, out_graphs);
+        dbg(out_graphs.size());
+        return;
+    }
+    // // Hack for HetConv fusion
+    // if (statGraph(in_graph) == NMutator::SGType::HetConv) {
+    //     dbg("Start fuse HetConv");
+    //     out_graphs.emplace_back(fuseHetConv(nullptr, in_graph));
+    // }
+    // Clear input names maps with tensor
+    inputsNameNToTensorT.clear();
+    std::vector<Operator> computeOps;
+    in_graph->getComputeOps(computeOps);
+    // assert(computeOps.size() == 1);
+    if (computeOps.size() == 1)
+        runSingleOp(in_graph, out_graphs);
+    // FIXME: runMultipleOps results in segfault
+    // else
+    //     runMultipleOps(in_graph, out_graphs);
+}
+
+void NMutator::runSingleOpToNaiveMembound(Graph in_graph,
+                                          std::vector<Graph> &out_graphs) {
+    std::vector<Operator> computeOps;
+    in_graph->getComputeOps(computeOps);
+    assert(computeOps.size() == 1);
+    const auto &computeOp = computeOps[0];
+    auto g = std::make_shared<infini::Graph>();
+    auto expr = opToExpression(computeOp);
+    auto inputsN = nnet::GetTensorsVisitor().get(expr);
+    dbg(inputsN);
+    g->membound(computeOp->getInputs(), computeOp->getOutputs(),
+                {inputsN.at("A"), inputsN.at("K")}, expr, 0);
+    auto subgraph = new Graph(g->getOperators());
+    subgraph->print();
+    out_graphs.emplace_back(subgraph);
+}
+
+void NMutator::runSingleOp(Graph in_graph, std::vector<Graph> &out_graphs) {
+    std::vector<Operator> computeOps;
+    in_graph->getComputeOps(computeOps);
+    if (infini::Graph g = transformTConv1x1(computeOps[0])) {
+        Graph graph = new Graph(g->getOperators());
+        out_graphs.emplace_back(graph);
+        return;
+    }
+    // Commented for debug, not implemented yet
+    // if (infini::Graph g = transformTConv3x3(computeOps[0])) {
+    //     Graph graph = new Graph(g->getOperators());
+    //     out_graphs.emplace_back(graph);
+    //     return;
+    // }
+    if (infini::Graph g = transformDialtedConv(computeOps[0])) {
+        Graph graph = new Graph(g->getOperators());
+        out_graphs.emplace_back(graph);
+        return;
+    }
+    // if (infini::Graph g = transformConv1x1(computeOps[0])) {
+    //     Graph graph = new Graph(g->getOperators());
+    //     out_graphs.emplace_back(graph);
+    //     return;
+    // }
+    // if (infini::Graph g = transformConv1xk(computeOps[0])) {
+    //     Graph graph = new Graph(g->getOperators());
+    //     out_graphs.emplace_back(graph);
+    //     return;
+    // }
+
+    auto expr = opToExpression(computeOps[0]);
+    if (!expr)
+        return;
+
+    nnet::Derivator derivator(maxDepth);
+    nnet::Formula conv_9x9(expr, 0);
+    // const std::vector<int> rules{3, 2, 2, 2, 2, 5, 8, 8, 6, 91, 90}; // Tconv
+    // const std::vector<int> rules{1, 7, 7, 2, 8, 6, 6}; // G2BMM
+    if (mode == Mode::Normal) {
+        derivator.search(conv_9x9, 0);
+    } else if (mode == Mode::RuleBased) {
+        dbg(derivationRules);
+        derivator.ruleBasedDFS(conv_9x9, 0, derivationRules);
+    } else
+        nnet_assert(0, "Unknown mode");
+    const auto &candidates = derivator.getCandidates();
+    dbg(candidates.size());
+    // derivator.print();
+    for (const auto &candidate : candidates) {
+        // dbg(nnet::FullPrinterVisitor().print(candidate.root));
+        if (auto g = expressionToGraph(candidate.root, in_graph)) {
+            Graph graph = new Graph(g->getOperators());
+            out_graphs.emplace_back(graph);
+        }
+        // break; // HACK:Debug only for the first subgraph
+    }
+    // dbg(out_graphs);
+    // for (auto graph : out_graphs) {
+    //     graph->print();
+    // }
+    cntStates += derivator.getNumIntermediateStates();
+    cntCandidates += derivator.getNumCandidates();
+}
+
+void NMutator::runMultipleOps(Graph in_graph, std::vector<Graph> &out_graphs) {
+    std::cout << "run multiple ops" << std::endl;
+    in_graph->print();
+    std::cout << std::endl;
+
+    std::vector<Operator> computeOps;
+    dbg(computeOps);
+    in_graph->getComputeOps(computeOps);
+    nnet::VecExpr exprs;
+    for (const auto &op : computeOps)
+        exprs.emplace_back(opToExpression(op));
+    dbg(exprs);
+
+    nnet::Derivator derivator;
+    nnet::MultiFormulas origin(exprs, 0);
+    bool canCombine = derivator.stageCombination(origin, 0);
+    dbg(canCombine);
+    const auto matmul0 = dynamic_cast<MatmulOp *>(computeOps[0]);
+    assert(matmul0);
+    // Build merged graph
+    auto g = new infini::Graph();
+    std::vector<Tensor *> inputsT, weightsT, outputsT;
+    for (const auto &opT : computeOps) {
+        inputsT.emplace_back(opT->getInputs(0));
+        weightsT.emplace_back(opT->getInputs(1));
+        outputsT.emplace_back(opT->getOutput());
+    }
+    const auto concat1 = g->concat(inputsT, 0);
+    const auto concat2 = g->concat(weightsT, 0);
+    const auto matmul = g->matmul(concat1->getOutput(), concat2->getOutput(),
+                                  matmul0->getTransA(), matmul0->getTransB());
+    g->split(matmul->getOutput(), outputsT, 0, computeOps.size());
+    // Build computation graph in PET:
+    g->updateConnection();
+    Graph graph = new Graph(g->getOperators());
+    out_graphs.emplace_back(graph);
+    // DEBUG
+    dbg(out_graphs);
+    for (auto graph : out_graphs) {
+        graph->print();
+    }
+}
+
+NMutator::SGType NMutator::statGraph(Graph sg) {
+    auto ops = sg->getOperators();
+    switch (ops.size()) {
+    case 0: {
+        return Empty;
+        break;
+    }
+
+    case 1: {
+        if (ops[0]->getType() == Operator::Conv) {
+            auto weight = ops[0]->getInputs()[1];
+            auto r = weight->getDims()[2];
+            auto s = weight->getDims()[3];
+            if (((ConvOp *)sg->getOperators()[0])->getDh() == 1 &&
+                ((ConvOp *)sg->getOperators()[0])->getDw() == 1 && r == 1 &&
+                s == 1) {
+                return Conv1X1;
+            } else if (((ConvOp *)sg->getOperators()[0])->getDh() == 2 ||
+                       ((ConvOp *)sg->getOperators()[0])->getDw() == 2) {
+                return DilatedConv;
+            } else {
+                const Dim &inDim = ops[0]->getInputs()[0]->getDims();
+                const Dim &wDim = ops[0]->getInputs()[1]->getDims();
+                if (inDim[2] % 2 == 1 && inDim[3] % 2 == 1)
+                    return NormalOddConv;
+                else if (wDim[2] != wDim[3])
+                    return TransKernelConv;
+                else
+                    return NormalConv;
+            }
+        } else if (ops[0]->getType() == Operator::Matmul) {
+            return NormalMatmul;
+        }
+        break;
+    }
+
+    default:
+        auto ty = ops[0]->getType();
+        for (size_t i = 1, iEnd = ops.size(); i < iEnd; ++i) {
+            if (ops[i]->getType() != ty)
+                return Others;
+        }
+        if (ty == Operator::Conv) {
+            std::vector<ConvOp *> convs;
+            for (auto op : ops)
+                convs.emplace_back(dynamic_cast<ConvOp *>(op));
+            // TODO: 1x1 conv enlarge. 1x1 conv has 0 padding
+            for (size_t i = 1, iEnd = ops.size(); i < iEnd; ++i)
+                if (!convs[i]->same(*convs[0]))
+                    return Others;
+            auto inDim = ops[0]->getInputs(0)->getDims();
+            // TODO: enlarge input tensor?
+            for (size_t i = 1, iEnd = ops.size(); i < iEnd; ++i)
+                if (ops[i]->getInputs(0)->getDims() != inDim)
+                    return Others;
+            auto weightDim = ops[0]->getInputs(1)->getDims();
+            auto groupFlag = true;
+            // TODO: kernel enlarge to group?
+            for (size_t i = 1, iEnd = ops.size(); i < iEnd; ++i) {
+                auto wDim = ops[i]->getInputs(1)->getDims();
+                if (!(wDim[1] == weightDim[1] && wDim[2] == weightDim[2] &&
+                      wDim[3] == weightDim[3] && wDim[2] == wDim[3])) {
+                    groupFlag = false;
+                    break;
+                }
+            }
+            if (groupFlag)
+                return GroupConv;
+            // Hack for HetConv
+            if (ops.size() == 2) {
+                auto w1Dim = weightDim, w2Dim = ops[1]->getInputs(1)->getDims();
+                auto hConvFlag = false;
+                if (w1Dim[0] == w2Dim[0] && w1Dim[1] == w2Dim[1]) {
+                    if (w1Dim[2] == 3 && w1Dim[3] == 3 && w2Dim[2] == 1 &&
+                        w2Dim[3] == 1) {
+                        hConvFlag = true;
+                    }
+                    if (w1Dim[2] == 1 && w1Dim[3] == 1 && w2Dim[2] == 3 &&
+                        w2Dim[3] == 3) {
+                        hConvFlag = true;
+                    }
+                }
+                if (hConvFlag) {
+                    // std::cout << "[nmutator stat graph]Het Conv found!"
+                    //           << std::endl;
+                    // ops[0]->print();
+                    // std::cout << std::endl;
+                    // ops[1]->print();
+                    // std::cout << std::endl;
+                    return HetConv;
+                }
+            }
+            auto transGroupFlag = true;
+            // TODO: transpose group conv with different f dim?
+            for (size_t i = 1, iEnd = ops.size(); i < iEnd; ++i) {
+                auto wDim = ops[i]->getInputs(1)->getDims();
+                if (!(wDim[0] == weightDim[0] && wDim[1] == weightDim[1] &&
+                      ((wDim[2] == weightDim[2] && wDim[3] == weightDim[3]) ||
+                       (wDim[2] == weightDim[3] && wDim[3] == weightDim[2])))) {
+                    transGroupFlag = false;
+                    break;
+                }
+            }
+            if (transGroupFlag)
+                return TransposeGroupConv;
+        } else if (ty == Operator::Matmul) {
+            // check same input shape or not
+            for (int i = 0; i < (int)ops.size() - 1; ++i) {
+                assert(dynamic_cast<MatmulOp *>(ops[i])->getTransA() ==
+                       dynamic_cast<MatmulOp *>(ops[i + 1])->getTransA());
+                assert(dynamic_cast<MatmulOp *>(ops[i])->getTransB() ==
+                       dynamic_cast<MatmulOp *>(ops[i + 1])->getTransB());
+                if (ops[i]->getInputs()[0]->getDims() !=
+                    ops[i + 1]->getInputs()[0]->getDims()) {
+                    return Others;
+                }
+                if (ops[i]->getInputs()[1]->getDims() !=
+                    ops[i + 1]->getInputs()[1]->getDims()) {
+                    return Others;
+                }
+            }
+            return BatchMatmul;
+        }
+        // TODO: others?
+        break;
+    }
+
+    return Others;
+}
+
+uint64_t NMutator::computeHashForSingleComputeOp(const Operator op) {
+    if (op->getType() == Operator::Conv) {
+        auto conv = dynamic_cast<const ConvOp *>(op);
+        auto hash = conv->getHash();
+        auto inputDim = conv->getInputs()[0]->getDims();
+        auto weightDim = conv->getOutputs()[0]->getDims();
+        hash += inputDim[0] * 10000019 + inputDim[1] * 10000079 +
+                inputDim[2] * 10000103 + inputDim[3] * 10000121 +
+                weightDim[0] * 10000139 + weightDim[1] * 10000141 +
+                weightDim[2] * 10000169 + weightDim[3] * 10000189;
+        return hash;
+    } else if (op->getType() == Operator::ConvTrans) {
+        auto conv = dynamic_cast<const ConvTransOp *>(op);
+        auto hash = conv->getHash();
+        auto inputDim = conv->getInputs()[0]->getDims();
+        auto weightDim = conv->getOutputs()[0]->getDims();
+        hash += inputDim[0] * 10000019 + inputDim[1] * 10000079 +
+                inputDim[2] * 10000103 + inputDim[3] * 10000121 +
+                weightDim[0] * 10000139 + weightDim[1] * 10000141 +
+                weightDim[2] * 10000169 + weightDim[3] * 10000189;
+        return hash;
+    } else if (op->getType() == Operator::Matmul) {
+        static uint64_t matmulhash = 0;
+        return matmulhash++;
+    } else if (op->getType() == Operator::G2BMM) {
+        auto g2bmm = dynamic_cast<const G2BMMOp *>(op);
+        auto hash = g2bmm->getHash();
+        auto inputDim = g2bmm->getInputs()[0]->getDims();
+        auto weightDim = g2bmm->getOutputs()[0]->getDims();
+        hash += inputDim[0] * 10000019 + inputDim[1] * 10000079 +
+                inputDim[2] * 10000103 + inputDim[3] * 10000121 +
+                weightDim[0] * 10000139 + weightDim[1] * 10000141 +
+                weightDim[2] * 10000169 + weightDim[3] * 10000189;
+        return hash;
+    } else if (op->getType() == Operator::GBMML) {
+        auto gbmml = dynamic_cast<const GBMMLOp *>(op);
+        auto hash = gbmml->getHash();
+        auto inputDim = gbmml->getInputs()[0]->getDims();
+        auto weightDim = gbmml->getOutputs()[0]->getDims();
+        hash += inputDim[0] * 10000019 + inputDim[1] * 10000079 +
+                inputDim[2] * 10000103 + inputDim[3] * 10000121 +
+                weightDim[0] * 10000139 + weightDim[1] * 10000141 +
+                weightDim[2] * 10000169 + weightDim[3] * 10000189;
+        return hash;
+    } else {
+        // Not impl
+        assert(false);
+        return 0;
+    }
+}
+
+nnet::Expr NMutator::opToExpression(Operator op) {
+    if (auto convOp = dynamic_cast<ConvOp *>(op)) {
+        const auto &inputs = convOp->getInputs();
+        const auto &AT = inputs[0];
+        const auto &KT = inputs[1];
+        const auto &[n, c, h, w, f, r, s, ph, pw, sh, sw, dh, dw, g, bi, ac] =
+            convOp->getArgs(0);
+        dbg(n, c, h, w, f, r, s, ph, pw, sh, sw, dh, dw);
+        if (!(sh == 1 && sw == 1 && dh == 1 && dw == 1))
+            return nullptr;
+        assert(sh == 1 && sw == 1 && dh == 1 && dw == 1);
+        inputsNameNToTensorT["A"] = AT;
+        inputsNameNToTensorT["K"] = KT;
+        const auto A = nnet::makeTensor("A", AT->getDims(),
+                                        std::vector<int>{0, 0, ph, pw});
+        const auto K = nnet::makeTensor("K", KT->getDims());
+        return nnet::ConvPattern::getExpr(A, K, n, c, h, w, f, r, s);
+    } else if (auto convOp = dynamic_cast<ConvTransOp *>(op)) {
+        const auto &AT = convOp->getInputs()[0];
+        const auto &KT = convOp->getInputs()[1];
+        inputsNameNToTensorT["A"] = AT;
+        inputsNameNToTensorT["K"] = KT;
+        const auto &[n, c, h, w, f, r, s, ph, pw, sh, sw, dh, dw, g, bi, ac] =
+            convOp->getArgs(0);
+        if (r != 4) {
+            dbg("ConvTranspose R!=4. Skipped.", r);
+            return nullptr;
+        }
+        int padding = 1 * (r - 1) - 1;
+        const auto A = nnet::makeTensor(
+            "A", AT->getDims(), std::vector<int>{0, padding, padding, 0});
+        const auto K = nnet::makeTensor("K", KT->getDims());
+        return nnet::ConvTransPattern::getExpr(A, K, n, c, h, w, f, r, s);
+    } else if (auto g2bmmOp = dynamic_cast<G2BMMOp *>(op)) {
+        const auto &AT = g2bmmOp->getInputs()[0];
+        const auto &BT = g2bmmOp->getInputs()[1];
+        const auto [b, m, k, width, dilation] = g2bmmOp->getArgs();
+
+        const auto &[expr, inputsN] =
+            nnet::Sg2bmmPattern::getExpr(b, m, k, width, dilation);
+        inputsNameNToTensorT[inputsN.first->getName()] = AT;
+        inputsNameNToTensorT[inputsN.second->getName()] = BT;
+        return expr;
+    } else if (auto gbmmlOp = dynamic_cast<GBMMLOp *>(op)) {
+        const auto &AT = gbmmlOp->getInputs()[0];
+        const auto &BT = gbmmlOp->getInputs()[1];
+        const auto [b, m, w, k, dilation] = gbmmlOp->getArgs();
+        const auto &[expr, inputsN] =
+            nnet::LongformerGBMMPattern::getExpr(b, m, w, k, dilation);
+        inputsNameNToTensorT[inputsN.first->getName()] = AT;
+        inputsNameNToTensorT[inputsN.second->getName()] = BT;
+        dbg(b, m, w, k, dilation, expr);
+        return expr;
+    } else if (auto matmulOp = dynamic_cast<MatmulOp *>(op)) {
+        const auto &AT = matmulOp->getInputs()[0];
+        const auto &BT = matmulOp->getInputs()[1];
+        const auto [transA, transB, b, m, n, k] = matmulOp->getArgs();
+        const auto &[expr, inputsN] =
+            nnet::MatmulPattern::getExpr(transA, transB, b, m, n, k);
+        inputsNameNToTensorT[inputsN.first->getName()] = AT;
+        inputsNameNToTensorT[inputsN.second->getName()] = BT;
+        dbg(b, m, n, k, expr);
+        return expr;
+    }
+    // else if (auto transposeOp = dynamic_cast<TransposeOp *>(op)) {
+    //     return transposeOpToExpression(transposeOp);
+    // }
+    nnet_unimplemented_continue();
+    return nullptr;
+}
+
+infini::Graph NMutator::fuseHetConv(nnet::Expr expr, Graph in_graph) {
+    // Conv3x3+Conv1x1 => Gemm(nhw, f(rs+1), c) + Reduce
+    auto g = std::make_shared<infini::Graph>();
+    in_graph->print();
+    assert(in_graph->getInputs().size() == 3);
+    auto input = in_graph->getOperators()[0]->getInputs(0);
+    auto conv = dynamic_cast<ConvOp *>(in_graph->getOperators()[0]);
+    auto output = conv->getOutput();
+    // auto input = g->reshape(input);
+    auto inputTrans = g->transpose(input, 0, {-1, {0, 2, 3}, 1}, -1);
+    // dbg(inputTrans->getOutput()->getDims());
+    const auto &[n, c, h, w, f, r, s, ph, pw, sh, sw, dh, dw, G, bi, ac] =
+        conv->getArgs(0);
+    auto weight = g->tensor({1, c, f * (3 * 3 + 1)});
+    dbg(weight->getDims());
+    auto matmul = g->matmul(inputTrans->getOutput(), weight, false, false);
+    auto bias = g->tensor({f});
+    const double size = n * f * h * w * (3 * 3 + 1) * 4;
+    // FIXME: add NNET tensors for verfication
+    auto membound =
+        g->membound({matmul->getOutput(), bias}, {output}, {}, nullptr,
+                    memboundTime(size), "Reduce_conv3x3+1x1");
+    dbg(n, f, h, w);
+    dynamic_cast<MemBoundOp *>(membound)->setNFHW(n, f, h, w);
+
+    return new Graph(g->getOperators());
+}
+
+infini::Graph NMutator::expressionToGraph(nnet::Expr expr, Graph in_graph) {
+    auto g = new infini::Graph();
+    nnet::FullPrinterVisitor fullVisitor;
+    const auto &tensorQueueN = fullVisitor.traverse(expr);
+    // Build tensors: Skip the first one, which is output
+    auto nameNToTensorT = inputsNameNToTensorT;
+    for (size_t i = 1; i < tensorQueueN.size(); ++i) {
+        const auto &[nameN, routineN, tensorN] = tensorQueueN[i];
+        // dbg(nameN, routineN, tensorN);
+        if (!routineN) {
+            // This is an inputs
+            assert(nameNToTensorT.count(nameN));
+        } else {
+            assert(!nameNToTensorT.count(nameN));
+            nameNToTensorT[nameN] = g->tensor(tensorN->getShape());
+        }
+    }
+    const auto &outputsPET = in_graph->getOutputs();
+    if (outputsPET.size() != 1) {
+        nnet_unimplemented_continue();
+        return nullptr;
+    }
+    nameNToTensorT[std::get<0>(tensorQueueN.at(0))] = outputsPET[0];
+    // Build computation graph in PET:
+    for (int i = tensorQueueN.size() - 1; i >= 0; --i) {
+        const auto &[outputNameN, routineN, tensorN] = tensorQueueN[i];
+        if (!routineN)
+            continue;
+        // dbg(outputNameN, routineN, tensorN, routineN->getType());
+        if (auto op = nnet::as<nnet::ConvNode>(routineN)) {
+            // g->conv(i8, w9, 2, 2);
+            std::vector<nnet::Tensor> inputsN = op->getInputs();
+            auto A = nameNToTensorT.at(inputsN[0]->getName());
+            auto K = nameNToTensorT.at(inputsN[1]->getName());
+            auto output = nameNToTensorT.at(outputNameN);
+            const auto &[ph, pw, sh, sw, dh, dw] = op->getArgs();
+            g->conv(A, K, output, ph, pw, sh, sw, dh, dw);
+        } else if (auto op = nnet::as<nnet::ElementWiseNode>(routineN)) {
+            assert(op->getInputs().size() == 1);
+            nnet::MatchReshapeVisitor matchReshapeVisitor;
+            if (matchReshapeVisitor(op->getExpr())) {
+                auto input =
+                    nameNToTensorT.at(op->getInputs().at(0)->getName());
+                auto output = nameNToTensorT.at(outputNameN);
+                g->reshape(input, output);
+            } else {
+                TensorVec inputsPET;
+                TensorVec outputsPET = {nameNToTensorT.at(outputNameN)};
+                for (const auto &inputN : op->getInputs())
+                    inputsPET.emplace_back(
+                        nameNToTensorT.at(inputN->getName()));
+                // Re-estimate time here.
+                ssize_t cnt = 0;
+                for (const auto tensor : inputsPET)
+                    cnt += tensor->size();
+                for (const auto tensor : outputsPET)
+                    cnt += tensor->size();
+                g->membound(inputsPET, outputsPET, op->getInputs(),
+                            op->getExpr(), memboundTime(cnt));
+            }
+        } else if (auto op = nnet::as<nnet::MatmulNode>(routineN)) {
+            assert(op->getInputs().size() == 2);
+            nnet::Tensor AN = op->getInputs()[0];
+            nnet::Tensor BN = op->getInputs()[1];
+            TensorVec inputsPET = {nameNToTensorT.at(AN->getName()),
+                                   nameNToTensorT.at(BN->getName())};
+            TensorVec outputsPET = {nameNToTensorT.at(outputNameN)};
+            const auto &[b, m, n, k, transa, transb] = op->getArgs();
+            g->matmul(inputsPET[0], inputsPET[1], outputsPET[0], transa,
+                      transb);
+        } else if (auto op = nnet::as<nnet::G2bmmNode>(routineN)) {
+            assert(op->getInputs().size() == 2);
+            nnet::Tensor AN = op->getInputs()[0];
+            nnet::Tensor BN = op->getInputs()[1];
+            TensorVec inputsPET = {nameNToTensorT.at(AN->getName()),
+                                   nameNToTensorT.at(BN->getName())};
+            TensorVec outputsPET = {nameNToTensorT.at(outputNameN)};
+            const auto &[b, m, w, k, dilation] = op->getArgs();
+            g->g2bmm(inputsPET[0], inputsPET[1], outputsPET[0], w, dilation);
+        } else if (auto op = nnet::as<nnet::GbmmNode>(routineN)) {
+            assert(op->getInputs().size() == 2);
+            nnet::Tensor AN = op->getInputs()[0];
+            nnet::Tensor BN = op->getInputs()[1];
+            TensorVec inputsPET = {nameNToTensorT.at(AN->getName()),
+                                   nameNToTensorT.at(BN->getName())};
+            TensorVec outputsPET = {nameNToTensorT.at(outputNameN)};
+            const auto &[b, m, w, n, dilation] = op->getArgs();
+            g->gbmml(inputsPET[0], inputsPET[1], outputsPET[0], dilation);
+        }
+    }
+    g->updateConnection();
+    Graph graph = new Graph(g->getOperators());
+    return graph;
+}
+
+Graph NMutator::transformDialtedConv(Operator op) {
+    if (auto convOp = dynamic_cast<ConvOp *>(op)) {
+        if (convOp->getPh() == convOp->getDh() && convOp->getSh() == 1 &&
+            convOp->getDh() > 1 && convOp->getDh() == convOp->getDw()) {
+            const int d = convOp->getDh();
+            assert(convOp->getInputs()[0]->getDims()[2] % d == 0);
+            auto g = new infini::Graph();
+            auto inputDims = convOp->getInputs(0)->getDims();
+            auto weightDims = convOp->getInputs(1)->getDims();
+            auto outputDims = convOp->getOutput()->getDims();
+            auto newA = g->tensor({inputDims[0] * d * d, inputDims[1],
+                                   inputDims[2] / d, inputDims[3] / d});
+            // auto newW = g->tensor(
+            //     {weightDims[0] * weightDims[1] * weightDims[3],
+            //     weightDims[2]});
+            auto newO =
+                g->tensor({inputDims[0] * inputDims[1] * inputDims[2],
+                           weightDims[0] * weightDims[1] * weightDims[3]});
+            g->membound(
+                {convOp->getInputs(0)}, {newA}, {}, nullptr,
+                memboundTime(convOp->getInputs(0)->size() + newA->size()),
+                "DConv Tranpose Input");
+            // g->membound({convOp->getInputs(1)}, {newW}, {}, nullptr, 0,
+            //             "Tranpose Weight");
+            g->conv(newA, convOp->getInputs(1), newO, 1, 1, 1, 1);
+            g->reshape(newO, convOp->getOutput());
+            dbg(newO->size(), convOp->getOutput()->size());
+            assert(newO->size() == convOp->getOutput()->size());
+            g->membound(
+                {newO}, {convOp->getOutput()}, {}, nullptr,
+                memboundTime(newO->size() + convOp->getOutput()->size()),
+                "DConv Tranpose Output");
+            g->updateConnection();
+            Graph graph = new Graph(g->getOperators());
+            return graph;
+        }
+    }
+    return nullptr;
+}
+
+double NMutator::memboundTime(ssize_t cnt) {
+    return double(cnt) * 4 / bandwidth * 1000; // millisecond
+}
+
+double NMutator::memboundTime(const Dim &dims) {
+    return memboundTime(dims.size());
+}
+
+Graph NMutator::transformTConv3x3(Operator op) {
+    if (auto tconvOp = dynamic_cast<ConvTransOp *>(op)) {
+        dbg(tconvOp->getInputs()[1]->getDims());
+        if (tconvOp->getPh() == 1 && tconvOp->getSh() == 2 &&
+            tconvOp->getInputs()[1]->getDims()[0] == 3 &&
+            tconvOp->getInputs()[1]->getDims()[1] == 3) {
+            auto g = new infini::Graph();
+            auto inputDims = tconvOp->getInputs(0)->getDims();
+            auto weightDims = tconvOp->getInputs(1)->getDims();
+            auto outputDims = tconvOp->getOutput()->getDims();
+            // NHWF
+            auto newA = g->tensor(
+                {inputDims[0] * inputDims[1] * inputDims[2], inputDims[3]});
+            // RSFC
+            auto newW = g->tensor(
+                {weightDims[0] * weightDims[1] * weightDims[3], weightDims[2]});
+            auto newO =
+                g->tensor({inputDims[0] * inputDims[1] * inputDims[2],
+                           weightDims[0] * weightDims[1] * weightDims[3]});
+            g->reshape(tconvOp->getInputs(0), newA);
+            g->reshape(tconvOp->getInputs(1), newW);
+            g->matmul(newA, newW, newO, 0, 1);
+            // g->reshape(newO, tconvOp->getOutput());
+            tconvOp->print();
+            dbg(newO->size() * 4, tconvOp->getOutput()->size() * 9);
+            assert(newO->size() * 4 == tconvOp->getOutput()->size() * 9);
+            g->membound(
+                {newO}, {tconvOp->getOutput()}, {}, nullptr,
+                memboundTime(newO->size() + tconvOp->getOutput()->size()),
+                "TConv3x3 reduce");
+            g->updateConnection();
+            Graph graph = new Graph(g->getOperators());
+            return graph;
+        }
+    }
+    return nullptr;
+}
+
+Graph NMutator::transformTConv1x1(Operator op) {
+    if (auto tconvOp = dynamic_cast<ConvTransOp *>(op)) {
+        if (tconvOp->getPh() == 0 && tconvOp->getSh() == 1) {
+            auto g = new infini::Graph();
+            auto inputDims = tconvOp->getInputs(0)->getDims();
+            auto weightDims = tconvOp->getInputs(1)->getDims();
+            auto outputDims = tconvOp->getOutput()->getDims();
+            auto newA = g->tensor(
+                {inputDims[0] * inputDims[1] * inputDims[2], inputDims[3]});
+            auto newW = g->tensor(
+                {weightDims[0] * weightDims[1] * weightDims[3], weightDims[2]});
+            auto newO =
+                g->tensor({inputDims[0] * inputDims[1] * inputDims[2],
+                           weightDims[0] * weightDims[1] * weightDims[3]});
+            g->reshape(tconvOp->getInputs(0), newA);
+            g->reshape(tconvOp->getInputs(1), newW);
+            g->matmul(newA, newW, newO, 0, 1);
+            g->reshape(newO, tconvOp->getOutput());
+            g->updateConnection();
+            Graph graph = new Graph(g->getOperators());
+            return graph;
+        }
+    }
+    return nullptr;
+}
+
+Graph NMutator::transformConv1x1(Operator op) {
+    auto convOp = dynamic_cast<ConvOp *>(op);
+    if (!convOp)
+        return nullptr;
+    if (convOp->getPh() == 0 && convOp->getSh() == 1 &&
+        convOp->getInputs()[1]->getDims()[2] == 1 &&
+        convOp->getInputs()[1]->getDims()[3] == 1) {
+        // Transpose is requrired for BS>1
+        // if (convOp->getInputs()[0]->getDims()[0] == 1) {
+        auto g = new infini::Graph();
+        auto inputDims = convOp->getInputs(0)->getDims();
+        auto weightDims = convOp->getInputs(1)->getDims();
+        auto outputDims = convOp->getOutput()->getDims();
+        auto newA = g->tensor(
+            {inputDims[1], inputDims[0] * inputDims[2] * inputDims[3]});
+        auto newW = g->tensor({weightDims[0], weightDims[1]});
+        auto newO = g->tensor(
+            {weightDims[0], inputDims[0] * inputDims[2] * inputDims[3]});
+        g->reshape(convOp->getInputs(0), newA);
+        g->reshape(convOp->getInputs(1), newW);
+        g->matmul(newW, newA, newO, 0, 0);
+        g->reshape(newO, convOp->getOutput());
+        g->updateConnection();
+        Graph graph = new Graph(g->getOperators());
+        return graph;
+    }
+    return nullptr;
+}
+
+Graph NMutator::transformConv1xk(Operator op) {
+    auto convOp = dynamic_cast<ConvOp *>(op);
+    if (!convOp)
+        return nullptr;
+    if (convOp->getSh() != 1 || convOp->getSw() != 1)
+        return nullptr;
+    bool a = convOp->getInputs()[1]->getDims()[2] == 1;
+    bool b = convOp->getInputs()[1]->getDims()[3] == 1;
+    if (!(a ^ b))
+        return nullptr;
+    convOp->print();
+    auto g = new infini::Graph();
+    auto inputDims = convOp->getInputs(0)->getDims();
+    auto weightDims = convOp->getInputs(1)->getDims();
+    auto outputDims = convOp->getOutput()->getDims();
+    auto newA =
+        g->tensor({inputDims[0] * inputDims[2] * inputDims[3], inputDims[1]});
+    auto newW = g->tensor(
+        {weightDims[0] * weightDims[2] * weightDims[3], weightDims[1]});
+    auto newO = g->tensor({weightDims[0] * weightDims[2] * weightDims[3],
+                           inputDims[0] * inputDims[2] * inputDims[3]});
+    // g->reshape(convOp->getInputs(0), newA);
+    g->membound({convOp->getInputs(0)}, {newA}, {}, nullptr,
+                memboundTime(convOp->getInputs(0)->size() + newA->size()),
+                "1xk input reshape");
+    g->reshape(convOp->getInputs(1), newW);
+
+    g->matmul(newW, newA, newO, 0, 1);
+    g->membound({newO}, {convOp->getOutput()}, {}, nullptr,
+                memboundTime(newW->size() + convOp->getOutput()->size()),
+                "1xk reduce");
+    g->updateConnection();
+    Graph graph = new Graph(g->getOperators());
+    return graph;
+}
+
+} // namespace infini
+
+#endif
\ No newline at end of file
diff --git a/src/nnet/permutation.cc b/src/nnet/permutation.cc
new file mode 100644
index 00000000..26f9a650
--- /dev/null
+++ b/src/nnet/permutation.cc
@@ -0,0 +1,35 @@
+#include "nnet/permutation.h"
+#include <algorithm>
+
+namespace nnet {
+
+PermutationGenerator::PermutationGenerator(vector<vector<Iterator>> _from,
+                                           vector<vector<Iterator>> _to)
+    : from(_from), to(_to), mapping(from.size()) {
+    assert(from.size() == to.size());
+    for (size_t i = 0; i < from.size(); ++i)
+        for (size_t j = 0; j < from[i].size(); ++j)
+            mapping[i].emplace_back(j);
+}
+
+bool PermutationGenerator::next() {
+    if (mapping.empty())
+        return false;
+    for (int i = (int)mapping.size() - 1; i >= 0; --i) {
+        if (std::next_permutation(mapping[i].begin(), mapping[i].end()))
+            return true;
+    }
+    return false;
+}
+
+PtrMap<Iterator, Iterator> PermutationGenerator::get() const {
+    if (mapping.empty())
+        return {};
+    PtrMap<Iterator, Iterator> ret;
+    for (size_t i = 0; i < mapping.size(); ++i)
+        for (size_t j = 0; j < mapping[i].size(); ++j)
+            ret[from[i][j]] = to[i][mapping[i][j]];
+    return ret;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/replace_kit.cc b/src/nnet/replace_kit.cc
new file mode 100644
index 00000000..abf18ce8
--- /dev/null
+++ b/src/nnet/replace_kit.cc
@@ -0,0 +1,92 @@
+#include "nnet/ReplaceKit.h"
+#include "nnet/Visitor/ReplaceVariable.h"
+#include "nnet/Visitor/SimplifyFormulaMutator.h"
+
+namespace nnet {
+
+RangeOp ReplaceKit::replaceRangeOpIterator(const RangeOp &rangeOp,
+                                           const Replace &replace,
+                                           const Expr &replacedSummand) {
+    vector<VarRangePair> newVarRangePairs(replace.newVarRanges);
+    if (replace.iteratorType == IterationType::Loop) {
+        for (const auto &[var, range] : rangeOp->getLoopVarRanges()) {
+            if (!replace.isReplaced(var))
+                newVarRangePairs.emplace_back(var, range);
+        }
+        assert(newVarRangePairs.size() == rangeOp->getLoopVarRanges().size() -
+                                              replace.oldIters.size() +
+                                              replace.newIters.size());
+        // Check the number of loop iterators
+        return makeRangeOperator(newVarRangePairs, rangeOp->getSumVarRanges(),
+                                 replacedSummand);
+    } else if (replace.iteratorType == IterationType::Sum) {
+        for (const auto &[var, range] : rangeOp->getSumVarRanges()) {
+            if (!replace.isReplaced(var))
+                newVarRangePairs.emplace_back(var, range);
+        }
+        assert(newVarRangePairs.size() == rangeOp->getSumVarRanges().size() -
+                                              replace.oldIters.size() +
+                                              replace.newIters.size());
+        return makeRangeOperator(rangeOp->getLoopVarRanges(), newVarRangePairs,
+                                 replacedSummand, rangeOp->getPaddings());
+    }
+    assert(false);
+    return nullptr;
+}
+
+Subscript ReplaceKit::buildSubscirptForLoopVarReplace(const RangeOp &inner,
+                                                      const Replace &replace) {
+    VecExpr subs(replace.phis);
+    for (size_t i = 0; i < replace.newVarRanges.size(); ++i) {
+        assert(replace.newIters[i]->equal(inner->getLoopVar(i)));
+    }
+    for (size_t i = replace.newVarRanges.size();
+         i < inner->getLoopVarRanges().size(); ++i) {
+        subs.emplace_back(inner->getLoopVar(i));
+    }
+    // The support of var reorder and replace at the same time
+    // VecExpr subs;
+    // for (size_t i = 0; i < inner->getLoopVarRanges().size(); ++i) {
+    //     if (auto it = std::find(replace.newIters.begin(),
+    //                             replace.newIters.end(),
+    //                             inner->getLoopVar(i));
+    //         it != replace.newIters.end()) {
+    //         subs.emplace_back(replace.phis[it - replace.newIters.begin()]);
+    //     } else
+    //         subs.emplace_back(inner->getLoopVar(i));
+    // }
+    return makeSubscript(inner, subs);
+}
+
+RangeOp
+ReplaceKit::buildDLTOuterRangeOp(const RangeOp &original,
+                                 const Subscript &subscriptedNewRangeOp) {
+    auto outer = make_ref<RangeOpNode>(*original);
+    outer->setSummand(subscriptedNewRangeOp);
+    outer->setSumIterator({});
+    return outer;
+}
+
+Expr ReplaceKit::replaceMultipleExprs(const Expr &cur,
+                                      const vector<Var> &patterns,
+                                      const VecExpr &replacements,
+                                      bool simplify) {
+    auto ret = cur;
+    for (size_t i = 0; i < patterns.size(); ++i) {
+        ret = replaceExpr(ret, patterns[i], replacements[i]);
+    }
+    if (simplify) {
+        SimplifyFormulaMutator simplifyFormulaMutator;
+        ret = simplifyFormulaMutator.simplify(ret);
+    }
+    return ret;
+}
+
+Expr ReplaceKit::replaceExpr(const Expr &cur, const Expr &pattern,
+                             const Expr &replacement) {
+    auto replace = ReplaceVariable(pattern, replacement);
+    auto ret = replace(cur);
+    return ret;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/routine.cc b/src/nnet/routine.cc
new file mode 100644
index 00000000..22c539bb
--- /dev/null
+++ b/src/nnet/routine.cc
@@ -0,0 +1,136 @@
+#include "nnet/routine.h"
+#include "nnet/Visitor/CloneMutator.h"
+#include <algorithm>
+namespace nnet {
+
+RoutineNode::RoutineNode(Expr _expr, const vector<Tensor> &_inputs)
+    : inputs(_inputs) {
+    if (_expr)
+        expr = CloneMutator().clone(_expr);
+    else
+        expr = nullptr;
+}
+
+string MatmulNode::toReadable() const {
+    std::ostringstream oss;
+    assert(inputs.size() == 2);
+    oss << "Matmul{bmnk = " << b << ", " << m << ", " << n << ", " << k
+        << "; AB = " << inputs[0] << ", " << inputs[1]
+        << "; transAB = " << transa << ", " << transb << "}";
+    return oss.str();
+}
+
+bool operator==(const MatmulNode &lhs, const MatmulNode &rhs) {
+    if (!((lhs.b == rhs.b) && lhs.m == rhs.m && lhs.n == rhs.n &&
+          lhs.k == rhs.k && lhs.transa == rhs.transa &&
+          lhs.transb == rhs.transb))
+        return false;
+    if (lhs.inputs.size() != rhs.inputs.size())
+        return false;
+    for (size_t i = 0; i < lhs.inputs.size(); ++i) {
+        if (lhs.inputs[i]->getName() != rhs.inputs[i]->getName())
+            return false;
+    }
+    return true;
+}
+
+string ConvNode::toReadable() const {
+    std::ostringstream oss;
+    assert(inputs.size() == 2);
+    oss << "Conv{A =" << inputs[0]
+        << " shape=" << serializeVec(inputs[0]->getShape())
+        << ", K=" << inputs[1]
+        << " shape=" << serializeVec(inputs[1]->getShape()) << ", p = " << ph
+        << ", " << pw << ", s= " << sh << ", " << sw << ", d= " << dh << ", "
+        << dw << "}";
+    return oss.str();
+}
+
+bool operator==(const ConvNode &lhs, const ConvNode &rhs) {
+    if (!(lhs.ph == rhs.ph && lhs.pw == rhs.pw && lhs.sh == rhs.sh &&
+          lhs.sw == rhs.sw && lhs.dh == rhs.dh && lhs.dw == rhs.dw))
+        return false;
+    if (lhs.inputs.size() != rhs.inputs.size())
+        return false;
+    for (size_t i = 0; i < lhs.inputs.size(); ++i) {
+        if (lhs.inputs[i]->getName() != rhs.inputs[i]->getName())
+            return false;
+    }
+    return true;
+}
+
+vector<int> ConvNode::getShape() const {
+    auto input = inputs[0], weight = inputs[1];
+    auto n = input->getShape(0);
+    auto h = input->getShape(2);
+    auto w = input->getShape(3);
+    auto f = weight->getShape(0);
+    auto r = weight->getShape(2);
+    auto s = weight->getShape(3);
+    int on = n, oc = f;
+    int oh = 0, ow = 0;
+    // Set padding size
+    oh = (h - (r - sh) * dh + ph * 2) / sh;
+    ow = (w - (s - sw) * dw + pw * 2) / sw;
+    auto ret = {on, oc, oh, ow};
+    return ret;
+}
+
+ConvArgs ConvNode::getArgs() const { return tuple(ph, pw, sh, sw, dh, dw); }
+
+vector<int> G2bmmNode::getShape() const { return {b, m, 2 * w + 1}; }
+
+vector<int> GbmmNode::getShape() const { return {b, m, n}; }
+
+string ElementWiseNode::toReadable() const {
+    std::ostringstream oss;
+    oss << "EleWise{";
+    for (const auto &input : inputs)
+        oss << input << ", ";
+    oss << "}";
+    return oss.str();
+}
+
+double ElementWiseNode::getEstimatedTime() const {
+    int64_t cntElements = 0;
+    // For unimplemented transpose
+    assert(inputs.size() > 0);
+    if (!expr) {
+        assert(inputs.size() == 1);
+    }
+    for (const auto &input : inputs)
+        cntElements += input->getSize();
+    int64_t outputSize = 1;
+    for (const auto &len : outputShape)
+        outputSize *= len;
+    cntElements += outputSize;
+
+    const double bandwidth = 200 * 1000000;
+    // dbg(inputs, inputs[0]->getShape(), cntElements,
+    // (cntElements * 4) / bandwidth);
+    return double(cntElements * 4) / bandwidth; // ms
+}
+
+string G2bmmNode::toReadable() const {
+    std::ostringstream oss;
+    oss << "G2bmm{";
+    for (const auto &input : inputs)
+        oss << input << ", ";
+    oss << ", bmwk = " << b << " " << m << " " << w << " " << k << "}";
+    return oss.str();
+}
+
+string GbmmNode::toReadable() const {
+    std::ostringstream oss;
+    oss << "Gbmm{";
+    for (const auto &input : inputs)
+        oss << input << ", ";
+    oss << ", bmwn = " << b << " " << m << " " << w << " " << n << "}";
+    return oss.str();
+}
+
+G2bmmArgs G2bmmNode::getArgs() const { return {b, m, w, k, 1}; }
+
+GbmmArgs GbmmNode::getArgs() const { return {b, m, w, n, 1}; }
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/test.cc b/src/nnet/test.cc
new file mode 100644
index 00000000..6c9738f2
--- /dev/null
+++ b/src/nnet/test.cc
@@ -0,0 +1,70 @@
+#include "nnet/Visitor/FullPrinterVisitor.h"
+#include "nnet/Visitor/GetTensorsVisitor.h"
+#include "nnet/Visitor/HashVisitor.h"
+#include "nnet/Visitor/Interpreter.h"
+#include "nnet/Visitor/Serializer.h"
+namespace nnet {
+
+int matchExprResult(Derivator &derivator, string fn) {
+    auto ans = Serializer().deserialize(fn);
+    auto hashAns = HashVisitor()(ans);
+    int match = 0;
+    for (const auto &candidate : derivator.getCandidates()) {
+        auto hashCandidate = HashVisitor()(candidate.root);
+        match += (hashAns == hashCandidate);
+    }
+    return match;
+}
+
+bool checkExprLogSame(string fnPrefix, int start, int end) {
+    Serializer serializer;
+    string fn0 = fnPrefix + to_string(start) + ".expr";
+    Expr expr0 = serializer.deserialize(fn0);
+    RangeOp range0 = as<RangeOpNode>(expr0);
+    Interpreter interpreter(range0);
+    auto ans0 = interpreter.interpretUniformSample(range0);
+    dbg(expr0, ans0);
+    for (int i = start + 1; i < end; ++i) {
+        string fn1 = fnPrefix + to_string(i) + ".expr";
+        Expr expr1 = serializer.deserialize(fn1);
+        RangeOp range1 = as<RangeOpNode>(expr1);
+        dbg(fn1, expr1);
+        auto ans1 = interpreter.interpretUniformSample(range1);
+        dbg(ans1);
+        if (ans0.size() != ans1.size())
+            return false;
+        for (size_t i = 0; i < ans0.size(); ++i)
+            if (ans0[i] != ans1[i])
+                return false;
+    }
+    return true;
+}
+
+bool checkExprsEquvivalence(VecExpr exprs) {
+    if (exprs.size() < 2)
+        return true;
+    auto inputsMap0 = GetTensorsVisitor().get(exprs[0]);
+    RangeOp range0 = as<RangeOpNode>(exprs[0]);
+    Interpreter interpreter(range0);
+    auto ans0 = interpreter.interpretUniformSample(range0);
+    for (size_t i = 1; i + 1 < exprs.size(); ++i) {
+        RangeOp range1 = as<RangeOpNode>(exprs[i]);
+        auto inputsMap1 = GetTensorsVisitor().get(range1);
+        // if expr0 and expr1 have different inputs, skip and return true
+        if (inputsMap0.size() != inputsMap1.size())
+            return true;
+        for (const auto &[name, tensor] : inputsMap0) {
+            if (!inputsMap1.count(name))
+                return true;
+        }
+        auto ans1 = interpreter.interpretUniformSample(range1);
+        if (ans0.size() != ans1.size())
+            return false;
+        for (size_t i = 0; i < ans0.size(); ++i)
+            if (ans0[i] != ans1[i])
+                return false;
+    }
+    return true;
+}
+
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/utils.cc b/src/nnet/utils.cc
new file mode 100644
index 00000000..174299e5
--- /dev/null
+++ b/src/nnet/utils.cc
@@ -0,0 +1,12 @@
+#include "nnet/common.h"
+namespace nnet {
+
+std::string pointer_to_hex(void *i) {
+    std::stringstream stream;
+    // stream << "0x" << std::setfill('0') << std::setw(sizeof(void *) * 2) <<
+    // std::hex
+    //        << i;
+    stream << std::hex << i;
+    return stream.str();
+}
+} // namespace nnet
\ No newline at end of file
diff --git a/src/nnet/visitor.cc b/src/nnet/visitor.cc
new file mode 100644
index 00000000..15efea0f
--- /dev/null
+++ b/src/nnet/visitor.cc
@@ -0,0 +1,97 @@
+#include "nnet/visitor.h"
+namespace nnet {
+
+Expr Mutator::visit_([[maybe_unused]] const Constant &c) { return nullptr; }
+
+Expr Mutator::visit_(const BinaryOp &c) {
+    if (verbose)
+        dbg(*c);
+    bool modified = false;
+    auto ret = make_ref<BinaryOpNode>(*c);
+    if (auto e = this->dispatch(ret->getLhs()); e) {
+        modified = true;
+        ret->setLhs(e);
+    }
+    if (auto e = this->dispatch(ret->getRhs()); e) {
+        modified = true;
+        ret->setRhs(e);
+    }
+    return (modified) ? ret : nullptr;
+}
+
+Expr Mutator::visit_(const RangeOp &c) {
+    if (verbose)
+        dbg(*c);
+    bool modified = false;
+    auto ret = make_ref<RangeOpNode>(*c);
+    if (auto mutated = this->dispatch(ret->getSummand()); mutated) {
+        modified = true;
+        ret->setSummand(mutated);
+    }
+    // NOT visit iterators and its ranges
+    return (modified) ? ret : nullptr;
+}
+
+Expr Mutator::visit_(const Subscript &c) {
+    if (verbose)
+        dbg(*c);
+    bool modified = false;
+    auto ret = make_ref<SubscriptNode>(*c);
+    for (size_t i = 0; i < ret->getDims(); ++i) {
+        const auto &e = ret->getIndex(i);
+        if (const auto &mutated = this->dispatch(e); mutated) {
+            modified = true;
+            ret->setIndex(i, mutated);
+        }
+    }
+    if (auto mutated = this->dispatch(ret->getObject()); mutated) {
+        modified = true;
+        ret->setObject(mutated);
+    }
+    return (modified) ? ret : nullptr;
+}
+
+Expr Mutator::visit_([[maybe_unused]] const Var &c) { return nullptr; }
+
+Expr Mutator::visit_([[maybe_unused]] const Tensor &c) { return nullptr; }
+
+Expr Mutator::visit_(const Func &c) {
+    if (verbose)
+        dbg(c);
+    bool modified = false;
+    auto ret = make_ref<FuncNode>(*c);
+    if (auto mutated = dispatch(c->getObject())) {
+        modified = true;
+        ret->setObject(mutated);
+    }
+    return (modified) ? ret : nullptr;
+}
+
+void ExprTreeVisitor::visit_(const RangeOp &c) {
+    if (inRange)
+        dispatch(c->getSummand());
+}
+void ExprTreeVisitor::visit_(const BinaryOp &c) {
+    if (inBinary) {
+        dispatch(c->getLhs());
+        dispatch(c->getRhs());
+    }
+}
+void ExprTreeVisitor::visit_(const Subscript &c) {
+    if (inSub) {
+        dispatch(c->getObject());
+        for (const auto &index : c->getIndex())
+            dispatch(index);
+    }
+}
+void ExprTreeVisitor::visit_([[maybe_unused]] const Var &c) {}
+void ExprTreeVisitor::visit_([[maybe_unused]] const Constant &c) {}
+void ExprTreeVisitor::visit_(const Tensor &c) {
+    if (inTensor && c->getSource()) {
+        if (const auto &expr = c->getSource()->getExpr(); expr)
+            dispatch(expr);
+    }
+}
+void ExprTreeVisitor::visit_(const Func &c) { dispatch(c->getObject()); }
+
+} // namespace nnet
diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc
new file mode 100644
index 00000000..9f15bc5c
--- /dev/null
+++ b/src/operators/matmul.cc
@@ -0,0 +1,56 @@
+#include "operators/matmul.h"
+
+namespace infini {
+
+vector<Shape> MatmulNode::computeShape() const { return {{b, m, n}}; }
+
+MatmulNode::MatmulNode(Tensor A, Tensor B, Tensor C, bool transA, bool transB,
+                       Tensor bias, ActType act)
+    : OperatorNode(OpType::Matmul, {A, B, bias}, {C}), transA(transA),
+      transB(transB), act(act), b(A->getDims()[0]),
+      m(transA ? A->getDims()[2] : A->getDims()[1]),
+      n(transB ? B->getDims()[1] : B->getDims()[2]),
+      k(transA ? A->getDims()[1] : A->getDims()[2]) {
+    IT_ASSERT(checkValid(inputs));
+}
+
+string MatmulNode::toString() const {
+    std::ostringstream os;
+    os << "Matmul([" << (transA ? "A^T" : "A") << "," << (transB ? "B^T" : "B")
+       << ",act=" << enum_to_underlying(act) << "],A=" << inputs[0]->getGuid()
+       << ",B=" << inputs[1]->getGuid() << ",C=" << outputs[0]->getGuid()
+       << ")";
+    return os.str();
+}
+
+bool MatmulNode::checkValid(const TensorVec &inputs) const {
+    auto A = inputs[0], B = inputs[1];
+    // if (A->getType() == Tensor::Weight && B->getType() == Tensor::Weight)
+    //     return false;
+    IT_ASSERT(A->getDims().size() == 3 && B->getDims().size() == 3);
+    IT_ASSERT(A->getDims()[0] == B->getDims()[0]);
+    IT_ASSERT((transA ? A->getDims()[1] : A->getDims()[2]) ==
+              (transB ? B->getDims()[2] : B->getDims()[1]));
+    // if (A->getDims().size() != 3 || B->getDims().size() != 3) {
+    //     return false;
+    // }
+    // if (A->getDims()[0] != B->getDims()[0]) {
+    //     return false;
+    // }
+    // if ((args.transA ? A->getDims()[1] : A->getDims()[2]) !=
+    //     (args.transB ? B->getDims()[2] : B->getDims()[1])) {
+    //     return false;
+    // }
+    return true;
+}
+
+HashType MatmulNode::hashWithShape() const {
+    // TODO: use a real hash
+    return b + m + n + k + transA + transB + enum_to_underlying(act);
+}
+
+OpPerfKey MatmulNode::getOpPerfKey() const {
+    return OpPerfKey(hashWithShape(), type,
+                     {b, m, n, k, transA, transB, enum_to_underlying(act)});
+}
+} // namespace infini
\ No newline at end of file
diff --git a/test/core/test_graph.cc b/test/core/test_graph.cc
new file mode 100644
index 00000000..b8a12333
--- /dev/null
+++ b/test/core/test_graph.cc
@@ -0,0 +1,46 @@
+#include "core/graph.h"
+#include "core/run_enigne.h"
+#include "operators/matmul.h"
+#include "test.h"
+
+namespace infini {
+
+TEST(Graph, build_and_run) {
+    Graph g = make_ref<GraphNode>();
+    Tensor i0 = g->addTensor({1, 2, 3}, DataType::Int32);
+    Tensor w0 = g->addTensor({1, 3, 4}, DataType::Int32);
+    Tensor o0 = g->addTensor({1, 2, 4}, DataType::Int32);
+    g->dataMalloc();
+    i0->copyData(vector<VType>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}.data());
+    w0->copyData(vector<VType>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}.data());
+    g->addOp(make_ref<MatmulNode>(i0, w0, o0));
+    RunEngine(Device::CPU).run(g);
+    // check answer
+    auto ans = make_ref<TensorNode>(Shape{1, 2, 4}, DataType::Int32);
+    ans->dataMalloc();
+    ans->copyData(vector<VType>{38, 44, 50, 56, 83, 98, 113, 128}.data());
+    EXPECT_TRUE(o0->equalData(ans));
+}
+
+TEST(Graph, perf_engine) {
+    Graph g = make_ref<GraphNode>();
+    Tensor i0 = g->addTensor({1, 2, 3}, DataType::Int32);
+    Tensor w0 = g->addTensor({1, 3, 4}, DataType::Int32);
+    Tensor o0 = g->addTensor({1, 2, 4}, DataType::Int32);
+    g->dataMalloc();
+    i0->copyData(vector<VType>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}.data());
+    w0->copyData(vector<VType>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}.data());
+    g->addOp(make_ref<MatmulNode>(i0, w0, o0));
+    RunEngine(Device::CPU).run(g, true, true);
+    double perfTime = RunEngine(Device::CPU).getPerfTime(g);
+    // The example matmul takes 0.0036ms with one core
+    EXPECT_GT(perfTime, 0);
+    EXPECT_LT(perfTime, 0.01);
+    // check answer
+    auto ans = make_ref<TensorNode>(Shape{1, 2, 4}, DataType::Int32);
+    ans->dataMalloc();
+    ans->copyData(vector<VType>{38, 44, 50, 56, 83, 98, 113, 128}.data());
+    EXPECT_TRUE(o0->equalData(ans));
+}
+
+} // namespace infini
\ No newline at end of file
diff --git a/test/nnet/failed/test_activation.cc b/test/nnet/failed/test_activation.cc
new file mode 100644
index 00000000..7b1400ec
--- /dev/null
+++ b/test/nnet/failed/test_activation.cc
@@ -0,0 +1,49 @@
+#include "code_engine.h"
+#include "nnet/expr.h"
+#include "nnet/nmutator.h"
+#include "operator.h"
+#include "search_engine.h"
+#include "tensor.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+TEST(Activation, Relu) {
+    const int n_heads = 8, seq_len = 10000, feat_len = 512;
+    // dilation_heads = 2;
+    const int Batch = n_heads, M = seq_len, K = feat_len, W = 32;
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, K}),
+                                  vector<int>{0, 0, 0});
+
+    auto subA = makeSubscript(A, {b, m, k});
+    auto innerRange = makeRangeOperator(
+        {{b, {0, Batch}}, {m, {0, M}}, {k, {0, K}}}, {}, subA);
+    auto outerSub = makeSubscript(innerRange, {b, m, k});
+    // auto subB = makeSubscript(B, {b, m + dilation * (w - W), k});
+    auto relu = make_ref<FuncNode>(subA, FuncType::Relu);
+    auto range =
+        makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {w, {0, 2 * W + 1}}},
+                          {{k, {0, K}}}, relu);
+    dbg(range);
+
+    auto g = new tpm::Graph();
+    auto i0 = g->tensor({Batch, M, K});
+    auto i1 = g->tensor({Batch, M, 2 * W + 1});
+
+    tpm::TensorVec inputsT{i0};
+    tpm::TensorVec outputsT{i1};
+    g->membound(inputsT, outputsT, {A}, range, 0);
+
+    g->updateConnection();
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    tpm::SearchEngine searchEngine(make_shared<tpm::NMutator>());
+    searchEngine.run(graph, bestGraph);
+    tpm::CodeEngine codeEngine;
+    codeEngine.genCode(bestGraph, "res.cu");
+}
diff --git a/test/nnet/failed/test_csrnet.cc b/test/nnet/failed/test_csrnet.cc
new file mode 100644
index 00000000..41b358bc
--- /dev/null
+++ b/test/nnet/failed/test_csrnet.cc
@@ -0,0 +1,81 @@
+#include "code_engine.h"
+#include "graph.h"
+#include "nnet/derivator.h"
+#include "nnet/dmutator.h"
+#include "nnet/expr.h"
+#include "nnet/visitor.h"
+#include "operator.h"
+#include "search_engine.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+void runCsrnetOpt(int batchSize) {
+    const int N = 2 * batchSize, C = 512, H = 14, W = 14 / 2, R = 3, S = 3;
+    auto g = new tpm::Graph();
+
+    auto i0 = g->tensor({N, C, H, W});
+    vector<tpm::Tensor *> w{
+        g->tensor({512, 512, R, S}), g->tensor({512, 512, R, S}),
+        g->tensor({512, 512, R, S}), g->tensor({256, 512, R, S}),
+        g->tensor({128, 256, R, S}), g->tensor({64, 128, R, S})};
+
+    const int nLayers = 6;
+    i0 = g->transpose(i0, 2, {{0, -1}, 1, 2, 3}, 2)->getOutput();
+    for (int i = 0; i < nLayers; ++i) {
+        auto conv = g->conv(i0, w[i], 1, 1, 1, 1, 1, 1);
+        auto relu = g->relu(conv->getOutput());
+        i0 = relu->getOutput();
+    }
+    auto i1 = g->transpose(i0, 0, {0, 1, {2, -1}, 3}, 2)->getOutput();
+    auto outputShape = i1->getDims();
+    ASSERT_TRUE(outputShape[0] == N * 2);
+    ASSERT_TRUE(outputShape[1] == 64);
+    ASSERT_TRUE(outputShape[2] == H);
+    ASSERT_TRUE(outputShape[3] == W / 2);
+
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    tpm::SearchEngine searchEngine(make_shared<tpm::DMutator>());
+    searchEngine.run(graph, bestGraph);
+    tpm::CodeEngine codeEngine;
+    codeEngine.genCode(bestGraph, "res.cu");
+}
+
+TEST(CSRNET, Original) {
+    const int N = 1, C = 512, H = 14, W = 14, R = 3, S = 3;
+    auto g = new tpm::Graph();
+
+    auto i0 = g->tensor({N, C, H, W});
+    vector<tpm::Tensor *> w{
+        g->tensor({512, 512, R, S}), g->tensor({512, 512, R, S}),
+        g->tensor({512, 512, R, S}), g->tensor({256, 512, R, S}),
+        g->tensor({128, 256, R, S}), g->tensor({64, 128, R, S})};
+
+    const int nLayers = 6;
+    for (int i = 0; i < nLayers; ++i) {
+        auto conv = g->conv(i0, w[i], 2, 2, 1, 1, 2, 2);
+        auto relu = g->relu(conv->getOutput());
+        i0 = relu->getOutput();
+    }
+    auto outputShape = i0->getDims();
+    ASSERT_TRUE(outputShape[0] == N);
+    ASSERT_TRUE(outputShape[1] == 64);
+    ASSERT_TRUE(outputShape[2] == H);
+    ASSERT_TRUE(outputShape[3] == W);
+
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    tpm::SearchEngine searchEngine(make_shared<tpm::DMutator>());
+    searchEngine.run(graph, bestGraph);
+    tpm::CodeEngine codeEngine;
+    codeEngine.genCode(bestGraph, "res.cu");
+}
+
+TEST(CSRNET, Optimized_BS1) { runCsrnetOpt(1); }
+TEST(CSRNET, Optimized_BS16) { runCsrnetOpt(16); }
\ No newline at end of file
diff --git a/test/nnet/failed/test_longformer.cc b/test/nnet/failed/test_longformer.cc
new file mode 100644
index 00000000..c11f63b9
--- /dev/null
+++ b/test/nnet/failed/test_longformer.cc
@@ -0,0 +1,351 @@
+#include "code_engine.h"
+#include "nnet/nmutator.h"
+#include "operator.h"
+#include "search_engine.h"
+#include "tensor.h"
+#include "gtest/gtest.h"
+using namespace std;
+namespace ch {
+using namespace std::chrono;
+}
+
+TEST(Longformer, e2e_bs1_depth) {
+    const int bs = 1, seqlen = 10000, w = 1000, featlen = 512, heads = 8, d = 4;
+    const int hidden = featlen, hiddenPerHead = hidden / heads;
+    assert(hidden % heads == 0);
+    auto g = new tpm::Graph();
+
+    auto i0 = g->tensor({bs, seqlen, featlen});
+    auto w0 = g->tensor({featlen, hidden});
+    auto w1 = g->tensor({512, 512});
+    auto w2 = g->tensor({512, 512});
+    // Feed forward
+    auto w3 = g->tensor({512, 512});
+    auto bias3 = g->tensor({512});
+    auto w4 = g->tensor({512, 512});
+    auto bias4 = g->tensor({512});
+
+    auto q0 = g->tensor({bs, seqlen, hidden});
+    auto k0 = g->tensor({bs, seqlen, hidden});
+    auto v0 = g->tensor({bs, seqlen, hidden});
+
+    auto q1 = g->tensor({bs, seqlen, heads, hiddenPerHead});
+    auto k1 = g->tensor({bs, seqlen, heads, hiddenPerHead});
+    auto v1 = g->tensor({bs, seqlen, heads, hiddenPerHead});
+
+    auto q2 = g->tensor({bs, heads, seqlen, hiddenPerHead});
+    auto k2 = g->tensor({bs, heads, seqlen, hiddenPerHead});
+    auto v2 = g->tensor({bs, heads, seqlen, hiddenPerHead});
+
+    auto q3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    auto k3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    auto v3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+
+    auto prob = g->tensor({bs * heads, seqlen, 2 * w + 1});
+    auto probSoftmax = g->tensor({bs * heads, seqlen, 2 * w + 1});
+    auto attn = g->tensor({bs * heads, seqlen, hiddenPerHead});
+
+    auto t00 = g->tensor({bs, seqlen, hidden});
+    auto t01 = g->tensor({bs, seqlen, hidden});
+    auto t02 = g->tensor({bs, seqlen, hidden});
+    // auto t10 = g->tensor({bs, seqlen, hidden});
+    auto t11 = g->tensor({bs, seqlen, hidden});
+    auto t12 = g->tensor({bs, seqlen, hidden});
+    auto output = g->tensor({bs, seqlen, featlen});
+
+    g->matmul(i0, w0, q0, false, true);
+    g->matmul(i0, w1, k0, false, true);
+    g->matmul(i0, w2, v0, false, true);
+    g->reshape(q0, q1);
+    g->reshape(k0, k1);
+    g->reshape(v0, v1);
+    g->transpose(q1, q2, 0, {{0, -1}, 2, 1, 3}, 1);
+    g->transpose(k1, k2, 0, {{0, -1}, 2, 1, 3}, 1);
+    g->transpose(v1, v2, 0, {{0, -1}, 2, 1, 3}, 1);
+    g->reshape(q2, q3);
+    g->reshape(k2, k3);
+    g->reshape(v2, v3);
+    // Attention
+    g->g2bmm(q3, k3, prob, w, d);
+    g->softmax(prob, probSoftmax, 2);
+    g->gbmml(probSoftmax, v3, attn, d);
+    g->transpose(attn, t00, 0, {0, 1, {-1, 2}}, heads);
+
+    // Feed forward
+    g->matmul(t00, w3, t01, false, true, bias3);
+    g->relu(t01, t02);
+    g->matmul(t02, w4, t11, false, true, bias4);
+    g->relu(t11, t12);
+    g->add({t12, i0}, output);
+
+    g->updateConnection();
+
+    for (int i = 0; i <= 8; ++i) {
+        ch::time_point<ch::high_resolution_clock, ch::nanoseconds> beg, end;
+        beg = ch::high_resolution_clock::now();
+        std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+        graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+        auto mutationEngine = make_shared<tpm::NMutator>();
+        mutationEngine->setMaxDepth(i);
+        tpm::SearchEngine searchEngine(mutationEngine);
+        searchEngine.run(graph, bestGraph);
+        // dbg("bestGraph");
+        bestGraph->print();
+        tpm::CodeEngine codeEngine;
+        // codeEngine.importPerfEngine(perfEngine);
+        // codeEngine.genCode(bestGraph, "res.cu");
+
+        // const auto originalTime = searchEngine.getPerf(graph, true);
+        const auto bestTime = searchEngine.getPerf(bestGraph, true);
+        // dbg(originalTime, bestTime);
+        // EXPECT_GE(originalTime, 45);
+        // EXPECT_LE(bestTime, 25);
+        end = ch::high_resolution_clock::now();
+        double t = ch::duration_cast<ch::duration<double>>(end - beg).count();
+        // printf("====== maxdepth=%d \n", i);
+        printf("Statistics: maxdepth %d , time %.3lf s, states %lld , "
+               "candidate %lld , best time %lf\n",
+               i, t, mutationEngine->cntStates, mutationEngine->cntCandidates,
+               bestTime);
+    }
+}
+
+TEST(Longformer, e2e_bs1) {
+    const int bs = 1, seqlen = 10000, w = 1000, featlen = 512, heads = 8, d = 4;
+    const int hidden = featlen, hiddenPerHead = hidden / heads;
+    assert(hidden % heads == 0);
+    auto g = new tpm::Graph();
+
+    auto i0 = g->tensor({bs, seqlen, featlen});
+    auto w0 = g->tensor({featlen, hidden});
+    auto w1 = g->tensor({512, 512});
+    auto w2 = g->tensor({512, 512});
+    // Feed forward
+    auto w3 = g->tensor({512, 512});
+    auto bias3 = g->tensor({512});
+    auto w4 = g->tensor({512, 512});
+    auto bias4 = g->tensor({512});
+
+    auto q0 = g->tensor({bs, seqlen, hidden});
+    auto k0 = g->tensor({bs, seqlen, hidden});
+    auto v0 = g->tensor({bs, seqlen, hidden});
+
+    auto q1 = g->tensor({bs, seqlen, heads, hiddenPerHead});
+    auto k1 = g->tensor({bs, seqlen, heads, hiddenPerHead});
+    auto v1 = g->tensor({bs, seqlen, heads, hiddenPerHead});
+
+    auto q2 = g->tensor({bs, heads, seqlen, hiddenPerHead});
+    auto k2 = g->tensor({bs, heads, seqlen, hiddenPerHead});
+    auto v2 = g->tensor({bs, heads, seqlen, hiddenPerHead});
+
+    auto q3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    auto k3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    auto v3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+
+    auto prob = g->tensor({bs * heads, seqlen, 2 * w + 1});
+    auto probSoftmax = g->tensor({bs * heads, seqlen, 2 * w + 1});
+    auto attn = g->tensor({bs * heads, seqlen, hiddenPerHead});
+
+    auto t00 = g->tensor({bs, seqlen, hidden});
+    auto t01 = g->tensor({bs, seqlen, hidden});
+    auto t02 = g->tensor({bs, seqlen, hidden});
+    // auto t10 = g->tensor({bs, seqlen, hidden});
+    auto t11 = g->tensor({bs, seqlen, hidden});
+    auto t12 = g->tensor({bs, seqlen, hidden});
+    auto output = g->tensor({bs, seqlen, featlen});
+
+    g->matmul(i0, w0, q0, false, true);
+    g->matmul(i0, w1, k0, false, true);
+    g->matmul(i0, w2, v0, false, true);
+    g->reshape(q0, q1);
+    g->reshape(k0, k1);
+    g->reshape(v0, v1);
+    g->transpose(q1, q2, 0, {{0, -1}, 2, 1, 3}, 1);
+    g->transpose(k1, k2, 0, {{0, -1}, 2, 1, 3}, 1);
+    g->transpose(v1, v2, 0, {{0, -1}, 2, 1, 3}, 1);
+    g->reshape(q2, q3);
+    g->reshape(k2, k3);
+    g->reshape(v2, v3);
+    // Attention
+    g->g2bmm(q3, k3, prob, w, d);
+    g->softmax(prob, probSoftmax, 2);
+    g->gbmml(probSoftmax, v3, attn, d);
+    g->transpose(attn, t00, 0, {0, 1, {-1, 2}}, heads);
+
+    // Feed forward
+    g->matmul(t00, w3, t01, false, true, bias3);
+    g->relu(t01, t02);
+    g->matmul(t02, w4, t11, false, true, bias4);
+    g->relu(t11, t12);
+    g->add({t12, i0}, output);
+
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    auto mutationEngine = make_shared<tpm::NMutator>();
+    mutationEngine->setMaxDepth(5);
+    tpm::SearchEngine searchEngine(mutationEngine);
+    searchEngine.run(graph, bestGraph);
+    dbg("bestGraph");
+    bestGraph->print();
+    tpm::CodeEngine codeEngine;
+    codeEngine.genCode(bestGraph, "res.cu");
+
+    const auto originalTime = searchEngine.getPerf(graph, true);
+    const auto bestTime = searchEngine.getPerf(bestGraph, true);
+    dbg(originalTime, bestTime);
+    EXPECT_GE(originalTime, 45);
+    EXPECT_LE(bestTime, 25);
+}
+
+TEST(Longformer, g2bmm_bs1_d1) {
+    const int bs = 1, seqlen = 10000, w = 1000, featlen = 512, heads = 8, d = 1;
+    const int hidden = featlen, hiddenPerHead = hidden / heads;
+    auto g = new tpm::Graph();
+    auto q3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    auto k3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    // auto v3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    auto prob = g->tensor({bs * heads, seqlen, 2 * w + 1});
+    g->g2bmm(q3, k3, prob, w, d);
+    // g->softmax(prob, probSoftmax, 2);
+    // g->gbmml(probSoftmax, v3, attn, d);
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    tpm::SearchEngine searchEngine(make_shared<tpm::NMutator>());
+    searchEngine.run(graph, bestGraph);
+    dbg("bestGraph");
+    bestGraph->print();
+
+    const auto originalTime = searchEngine.getPerf(graph, true);
+    EXPECT_GE(originalTime, 500);
+    const auto bestTime = searchEngine.getPerf(bestGraph, true);
+    EXPECT_LE(bestTime, 400);
+}
+
+TEST(Longformer, g2bmm_bs1_d4) {
+    const int bs = 1, seqlen = 10000, w = 1000, featlen = 512, heads = 8, d = 4;
+    const int hidden = featlen, hiddenPerHead = hidden / heads;
+    auto g = new tpm::Graph();
+    auto q3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    auto k3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    // auto v3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    auto prob = g->tensor({bs * heads, seqlen, 2 * w + 1});
+    g->g2bmm(q3, k3, prob, w, d);
+    // g->softmax(prob, probSoftmax, 2);
+    // g->gbmml(probSoftmax, v3, attn, d);
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    tpm::SearchEngine searchEngine(make_shared<tpm::NMutator>());
+    searchEngine.run(graph, bestGraph);
+    dbg("bestGraph");
+    bestGraph->print();
+
+    const auto originalTime = searchEngine.getPerf(graph, true);
+    EXPECT_GE(originalTime, 500);
+    const auto bestTime = searchEngine.getPerf(bestGraph, true);
+    EXPECT_LE(bestTime, 400);
+}
+
+TEST(Longformer, e2e_bs16) {
+    const int bs = 16, seqlen = 10000, w = 1000, featlen = 512, heads = 8,
+              d = 4;
+    const int hidden = featlen, hiddenPerHead = hidden / heads;
+    assert(hidden % heads == 0);
+    auto g = new tpm::Graph();
+
+    auto i0 = g->tensor({bs * seqlen, featlen});
+    auto w0 = g->tensor({featlen, hidden});
+    auto w1 = g->tensor({512, 512});
+    auto w2 = g->tensor({512, 512});
+    // Feed forward
+    auto w3 = g->tensor({512, 512});
+    auto bias3 = g->tensor({512});
+    auto w4 = g->tensor({512, 512});
+    auto bias4 = g->tensor({512});
+
+    auto q0 = g->tensor({bs * seqlen, hidden});
+    auto k0 = g->tensor({bs * seqlen, hidden});
+    auto v0 = g->tensor({bs * seqlen, hidden});
+
+    auto q1 = g->tensor({bs, seqlen, heads, hiddenPerHead});
+    auto k1 = g->tensor({bs, seqlen, heads, hiddenPerHead});
+    auto v1 = g->tensor({bs, seqlen, heads, hiddenPerHead});
+
+    auto q2 = g->tensor({bs, heads, seqlen, hiddenPerHead});
+    auto k2 = g->tensor({bs, heads, seqlen, hiddenPerHead});
+    auto v2 = g->tensor({bs, heads, seqlen, hiddenPerHead});
+
+    auto q3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    auto k3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    auto v3 = g->tensor({bs * heads, seqlen, hiddenPerHead});
+
+    // TODO check  membound time
+    auto prob = g->tensor({bs * heads, seqlen, 2 * w + 1});
+    auto probSoftmax = g->tensor({bs * heads, seqlen, 2 * w + 1});
+    auto attn = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    // auto attnReshape = g->tensor({bs, heads, seqlen, hiddenPerHead});
+
+    auto t0 = g->tensor({bs * seqlen, hidden});
+    auto t00 = g->tensor({bs * seqlen, hidden});
+    auto t01 = g->tensor({bs * seqlen, hidden});
+    auto t02 = g->tensor({bs * seqlen, hidden});
+    auto t11 = g->tensor({bs * seqlen, hidden});
+    auto t12 = g->tensor({bs * seqlen, hidden});
+    auto t13 = g->tensor({bs * seqlen, hidden});
+    auto output = g->tensor({bs, seqlen, featlen});
+
+    g->matmul(i0, w0, q0, false, true);
+    g->matmul(i0, w1, k0, false, true);
+    g->matmul(i0, w2, v0, false, true);
+    g->reshape(q0, q1);
+    g->reshape(k0, k1);
+    g->reshape(v0, v1);
+    g->transpose(q1, q2, 0, {{0, -1}, 2, 1, 3}, 1);
+    g->transpose(k1, k2, 0, {{0, -1}, 2, 1, 3}, 1);
+    g->transpose(v1, v2, 0, {{0, -1}, 2, 1, 3}, 1);
+    g->reshape(q2, q3);
+    g->reshape(k2, k3);
+    g->reshape(v2, v3);
+    // Attention
+    g->g2bmm(q3, k3, prob, w, d);
+    g->softmax(prob, probSoftmax, 2);
+    g->gbmml(probSoftmax, v3, attn, d);
+    // g->reshape(attn, attnReshape);
+    // HOW TO DO IT
+    // auto attn = g->tensor({bs * heads, seqlen, hiddenPerHead});
+    // auto attnReshape = g->tensor({bs, heads, seqlen, hiddenPerHead});
+    // auto t00 = g->tensor({bs, seqlen, heads*hiddenPerHeadidden});
+    g->transpose(attn, t0, 0, {0, 1, {-1, 2}}, heads);
+    // g->transpose(attnReshape, t00, 0, {0, -1, 2, 1, 3}, 1);
+    g->reshape(t0, t00);
+
+    // Feed forward
+    g->matmul(t00, w3, t01, false, true, bias3);
+    g->relu(t01, t02);
+    g->matmul(t02, w4, t11, false, true, bias4);
+    g->relu(t11, t12);
+    g->add({t12, i0}, t13);
+    g->reshape(t13, output);
+
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    tpm::SearchEngine searchEngine(make_shared<tpm::NMutator>());
+    searchEngine.run(graph, bestGraph);
+    dbg("bestGraph");
+    bestGraph->print();
+    tpm::CodeEngine codeEngine;
+    codeEngine.genCode(bestGraph, "res.cu");
+
+    const auto originalTime = searchEngine.getPerf(graph, true);
+    const auto bestTime = searchEngine.getPerf(bestGraph, true);
+    dbg(originalTime, bestTime);
+    EXPECT_GE(originalTime, 700);
+    EXPECT_LE(bestTime, 400);
+}
\ No newline at end of file
diff --git a/test/nnet/failed/test_multiple_op.cc b/test/nnet/failed/test_multiple_op.cc
new file mode 100644
index 00000000..78f64ea1
--- /dev/null
+++ b/test/nnet/failed/test_multiple_op.cc
@@ -0,0 +1,30 @@
+#include "nnet/nmutator.h"
+#include "operator.h"
+#include "search_engine.h"
+#include "tensor.h"
+#include "gtest/gtest.h"
+#include <cstdlib>
+#include <iostream>
+
+const int m = 8, n = 8, k = 4;
+
+TEST(MULTIPLE_OP, main) {
+    auto g = new tpm::Graph();
+    auto i0 = g->tensor({1, m, k});
+    auto w0 = g->tensor({1, k, n});
+    auto w1 = g->tensor({1, k, n});
+    auto i1 = g->tensor({1, m, n});
+    auto i2 = g->tensor({1, m, n});
+    // auto i3 = g->tensor({1, m * 2, n});
+
+    g->matmul(i0, w0, i1);
+    g->matmul(i0, w1, i2);
+    // auto op2 = g->concat({i1, i2}, i3, 1);
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    tpm::SearchEngine searchEngine(std::make_shared<tpm::NMutator>());
+    searchEngine.run(graph, bestGraph);
+
+    delete g;
+}
diff --git a/test/nnet/failed/test_mutator.cc b/test/nnet/failed/test_mutator.cc
new file mode 100644
index 00000000..81ebcd24
--- /dev/null
+++ b/test/nnet/failed/test_mutator.cc
@@ -0,0 +1,407 @@
+#include "code_engine.h"
+#include "nnet/nmutator.h"
+#include "operator.h"
+#include "search_engine.h"
+#include "tensor.h"
+#include "gtest/gtest.h"
+using namespace std;
+using namespace infini;
+
+// TEST(Mutator, Conv9x9) {
+//     auto g = new tpm::Graph();
+//     auto i0 = g->tensor({1, 1, 224, 224});
+
+//     auto w1 = g->tensor({64, 1, 9, 9});
+
+//     g->conv(i0, w1, 4, 4);
+
+//     g->updateConnection();
+
+//     std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+//     graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+//     tpm::SearchEngine<tpm::NMutator> searchEngine;
+//     searchEngine.run(graph, bestGraph);
+//     tpm::CodeEngine codeEngine;
+//     auto perfEngine = searchEngine.exportPerfEngine();
+//     codeEngine.importPerfEngine(perfEngine);
+//     codeEngine.genCode(bestGraph, "res.cu");
+// }
+
+// TEST(Mutator, TConv_1) {
+//     auto g = new tpm::Graph();
+
+//     auto i0 = g->tensor({1, 1, 1, 228});
+//     auto w1 = g->tensor({228, 2, 2, 448});
+
+//     // g->conv(i0, w1, 4, 4);
+//     g->convTrans(i0, w1, 0, 0, 1, 1);
+
+//     g->updateConnection();
+
+//     std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+//     graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+//     tpm::SearchEngine<tpm::NMutator> searchEngine;
+//     searchEngine.run(graph, bestGraph);
+//     tpm::CodeEngine codeEngine;
+//     auto perfEngine = searchEngine.exportPerfEngine();
+//     codeEngine.importPerfEngine(perfEngine);
+//     codeEngine.genCode(bestGraph, "res.cu");
+// }
+
+// TEST(Mutator, TConv_3) {
+//     auto g = new tpm::Graph();
+
+//     auto i0 = g->tensor({1, 2, 2, 448});
+//     auto w1 = g->tensor({448, 4, 4, 256});
+
+//     // g->conv(i0, w1, 4, 4);
+//     g->convTrans(i0, w1, 1, 1, 2, 2, 1, 1);
+
+//     g->updateConnection();
+
+//     std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+//     graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+//     tpm::SearchEngine<tpm::NMutator> searchEngine;
+//     searchEngine.run(graph, bestGraph);
+//     tpm::CodeEngine codeEngine;
+//     auto perfEngine = searchEngine.exportPerfEngine();
+//     codeEngine.importPerfEngine(perfEngine);
+//     codeEngine.genCode(bestGraph, "res.cu");
+// }
+
+// FIXME: failed since implicit transpose for DLT
+TEST(Mutator, DISABLED_InfoGAN_TConv_3_correctness) {
+    // verifyNaiveMembound True: subgraph after transformation
+    // verifyNaiveMembound False: subgraph of one single membound (eOP)
+    const bool verifyNaiveMembound = false;
+    auto g = new tpm::Graph();
+
+    // {n, h, w, f} * {r, s, f, c}
+    // {n, f, h, w} * {f, c, r, s}
+    auto i0 = g->tensor({1, 448, 2, 2});
+    auto w1 = g->tensor({448, 256, 4, 4});
+
+    g->convTrans(i0, w1, 1, 1, 2, 2, 1, 1);
+
+    g->updateConnection();
+
+    printf("--- Init Finished ---\n");
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    vector<tpm::SubGraph *> outGraphs;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    graph->print();
+    printf("--- Graph Finished ---\n");
+
+    auto mutationEngine = make_shared<tpm::NMutator>();
+    if (verifyNaiveMembound)
+        mutationEngine->setToNaiveMembound();
+    tpm::SearchEngine searchEngine(mutationEngine);
+
+    printf("--- SearchEngine Finished ---\n");
+
+    tpm::NMutator mutator;
+    if (verifyNaiveMembound)
+        mutator.setToNaiveMembound();
+    mutator.run(graph.get(), outGraphs);
+
+    printf("--- Mutator Finished ---\n");
+
+    bestGraph = shared_ptr<tpm::SubGraph>(outGraphs.back());
+    bestGraph->print();
+
+    printf("--- BestGraph Finished ---\n");
+
+    EXPECT_TRUE(graph->verification(bestGraph.get(), true));
+
+    // // Codegen (independent from the above)
+    // searchEngine.run(graph, bestGraph);
+    // tpm::CodeEngine codeEngine;
+    // auto perfEngine = searchEngine.exportPerfEngine();
+    // codeEngine.importPerfEngine(perfEngine);
+    // codeEngine.genCode(bestGraph, "res.cu");
+}
+
+// TEST(Mutator, G2BMM) {
+//     auto g = new tpm::Graph();
+
+//     int nHeads = 8, seq_len = 10000, feat_len = 64, w = 1000, d = 4;
+//     auto i0 = g->tensor({nHeads, seq_len, feat_len});
+//     auto i1 = g->tensor({nHeads, seq_len, feat_len});
+
+//     g->g2bmm(i0, i1, w, d);
+
+//     g->updateConnection();
+
+//     std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+//     graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+//     tpm::SearchEngine<tpm::NMutator> searchEngine;
+//     searchEngine.run(graph, bestGraph);
+//     tpm::CodeEngine codeEngine;
+//     auto perfEngine = searchEngine.exportPerfEngine();
+//     codeEngine.importPerfEngine(perfEngine);
+//     codeEngine.genCode(graph, "res.cu");
+//     // codeEngine.genCode(bestGraph, "res.cu");
+// }
+
+// TEST(Mutator, GBMML) {
+//     auto g = new tpm::Graph();
+
+//     int nHeads = 8, seq_len = 10000, feat_len = 64, w = 1000, d = 4;
+//     auto i0 = g->tensor({nHeads, seq_len, 2 * w + 1});
+//     auto i1 = g->tensor({nHeads, seq_len, feat_len});
+
+//     g->gbmml(i0, i1, d);
+
+//     g->updateConnection();
+
+//     std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+//     graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+//     tpm::SearchEngine<tpm::NMutator> searchEngine;
+//     searchEngine.run(graph, bestGraph);
+//     tpm::CodeEngine codeEngine;
+//     auto perfEngine = searchEngine.exportPerfEngine();
+//     codeEngine.importPerfEngine(perfEngine);
+//     codeEngine.genCode(graph, "res.cu");
+//     // codeEngine.genCode(bestGraph, "res.cu");
+// }
+
+// TEST(Mutator, Conv5x5) {
+//     //
+//     conv7x7->relu->conv3x3->relu->conv3x3->relu->conv3x3->relu->conv3x3->relu
+//     auto g = new tpm::Graph();
+//     auto i0 = g->tensor({1, 32, 224, 224});
+
+//     auto w1 = g->tensor({1, 32, 5, 5});
+
+//     g->conv(i0, w1, tpm::ConvOp::PaddingMode::Same);
+
+//     g->updateConnection();
+
+//     std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+//     graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+//     tpm::SearchEngine<tpm::NMutator> searchEngine;
+//     searchEngine.run(graph, bestGraph);
+//     tpm::CodeEngine codeEngine;
+//     auto perfEngine = searchEngine.exportPerfEngine();
+//     codeEngine.importPerfEngine(perfEngine);
+//     codeEngine.genCode(bestGraph, "res.cu");
+// }
+
+// TEST(Mutator, BMM) {
+//     const int m = 16, n = 1024, k = 1024;
+//     auto g = new tpm::Graph();
+//     auto i0 = g->tensor({1, m, k});
+//     auto w0 = g->tensor({1, k, n});
+//     auto w1 = g->tensor({1, k, n});
+//     auto w2 = g->tensor({1, k, n});
+
+//     g->matmul(i0, w0);
+//     g->matmul(i0, w1);
+//     g->matmul(i0, w2);
+//     g->updateConnection();
+
+//     std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+//     graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+//     tpm::SearchEngine<tpm::NMutator> searchEngine;
+//     searchEngine.run(graph, bestGraph);
+//     tpm::CodeEngine codeEngine;
+//     auto perfEngine = searchEngine.exportPerfEngine();
+//     codeEngine.importPerfEngine(perfEngine);
+//     codeEngine.genCode(bestGraph, "res.cu");
+// }
+
+TEST(Mutator, Conv2gemm1x1_bs1_mutator) {
+    const int N = 1, H = 7, W = 7, C = 512, F = 512, R = 1, S = 1;
+    auto g = new tpm::Graph();
+    auto i0 = g->tensor({N, C, H, W});
+    auto w1 = g->tensor({F, C, R, S});
+    g->conv(i0, w1, R / 2, S / 2);
+
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    vector<tpm::SubGraph *> out_graphs;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    const vector<int> rules = {3, 2, 2, 8, 8, 6, 6};
+    auto mutator = make_shared<tpm::NMutator>(rules);
+    mutator->run(graph.get(), out_graphs);
+    tpm::SearchEngine searchEngine(mutator);
+    int maxNReshapes = 0;
+    for (const auto &graph : out_graphs) {
+        searchEngine.getPerf(make_shared<tpm::SubGraph>(*graph), true);
+        int nReshapes = 0, nTrans = 0;
+        for (auto op : graph->getOperators()) {
+            nReshapes += op->isReshapeOp();
+            if (auto matmul = dynamic_cast<MatmulOp *>(op))
+                nTrans = matmul->getTransA() + matmul->getTransB();
+        }
+        maxNReshapes = max(maxNReshapes, nReshapes);
+        // Number of Reshapes for KxA and AxK
+        EXPECT_TRUE((nReshapes == 3 - nTrans) || (nReshapes == nTrans));
+    }
+    // Matmul K^N A^N -> no Membound
+    EXPECT_EQ(maxNReshapes, 3);
+}
+
+TEST(Mutator, Conv2gemm1x1_searchEngine_ruleBased) {
+    const int N = 1, H = 7, W = 7, C = 512, F = 512, R = 1, S = 1;
+    auto g = new tpm::Graph();
+    auto i0 = g->tensor({N, C, H, W});
+    auto w1 = g->tensor({F, C, R, S});
+    g->conv(i0, w1, R / 2, S / 2);
+
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    const vector<int> rules = {3, 2, 2, 8, 8, 6, 6};
+    tpm::SearchEngine searchEngine(make_shared<tpm::NMutator>(rules));
+    searchEngine.run(graph, bestGraph);
+
+    // clang-format off
+    // ========== PET graph getPerf ============
+    // Reshape(in=0,out=126)
+    //  op_time 0.000000
+    // Reshape(in=1,out=125)
+    //  op_time 0.000000
+    // Matmul([A,B,act=0],A=125,B=126,C=124, TTbmnk: 0, 0, 1, 512, 49, 512)
+    //  op_time 0.013799
+    // Reshape(in=124,out=3)
+    //  op_time 0.000000
+    //          Op Cnt   T_tot Percent  T_mean
+    //      Matmul   1   0.014   100.0   0.014
+    //     Reshape   3   0.000     0.0   0.000
+    // Origin Perf: 0.0553319
+    // Best Perf without correction: 0.0137989
+    // Best Perf with correction: 0.0137989
+    // clang-format on
+    EXPECT_EQ(bestGraph->getOperators().size(), 4u);
+    auto cntOps = bestGraph->countOps();
+    EXPECT_EQ(cntOps["Matmul"], 1);
+    EXPECT_EQ(cntOps["Reshape"], 3);
+    bestGraph->print();
+}
+
+TEST(Mutator, Conv2gemm1x1_searchEngine_search) {
+    const int N = 1, H = 7, W = 7, C = 512, F = 512, R = 1, S = 1;
+    auto g = new tpm::Graph();
+    auto i0 = g->tensor({N, C, H, W});
+    auto w1 = g->tensor({F, C, R, S});
+    g->conv(i0, w1, R / 2, S / 2);
+
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    tpm::SearchEngine searchEngine(make_shared<tpm::NMutator>());
+    searchEngine.run(graph, bestGraph);
+
+    EXPECT_EQ(bestGraph->getOperators().size(), 4u);
+    auto cntOps = bestGraph->countOps();
+    EXPECT_EQ(cntOps["Matmul"], 1);
+    EXPECT_EQ(cntOps["Reshape"], 3);
+    bestGraph->print();
+}
+
+TEST(Mutator, Conv2gemm1x7_searchEngine_ruleBased) {
+    const int N = 1, C = 2048, H = 7, W = 7, F = 128, R = 1,
+              S = 7; // gcn_Conv_137
+    auto g = new tpm::Graph();
+    auto i0 = g->tensor({N, C, H, W});
+    auto w1 = g->tensor({F, C, R, S});
+    g->conv(i0, w1, R / 2, S / 2);
+
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    const vector<int> rules = {3, 2, 2, 5, 8, 8, 6, 90};
+    tpm::SearchEngine searchEngine(make_shared<tpm::NMutator>(rules));
+    searchEngine.run(graph, bestGraph);
+
+    // clang-format off
+    //     ========== PET graph getPerf ============
+    // Reshape(in=0,out=309)
+    //  op_time 0.000000
+    // MemBound[124644277](i0=1, o0=308, exec_time=0.0683594, NNet Inputs=[K,])
+    // L<c:0:2048><i52:0:896>Sum  ...  [i52,c]
+    //     {L<i52:0:896><c:0:2048>Sum  ...  [(i52 / 7),c,((i52 / 7) % 1),(i52 % 7)]
+    //     {K}}
+
+    //  op_time 0.000000
+    // Matmul([A^T,B,act=0],A=308,B=309,C=307, TTbmnk: 1, 0, 1, 896, 49, 2048)
+    //  op_time 0.024471
+    // MemBound[124644277](i0=307, o0=3, exec_time=0.001, NNet Inputs=[T49,])
+    // L<n:0:1><f:0:128><h:0:7><w:0:7>Sum<r:0:1><s:0:7>  ...  [(h + r),r,(w + s),s,n,f]
+    //     {L<i45:0:7><i46:0:1><i26:3:10><i27:0:7><n:0:1><f:0:128><pad=0,0,3,0,0,0,>Sum  ...  [(((7 * f) + (7 * i46)) + i27),(((49 * n) + (7 * i45)) + (i26 + -3))]
+    //     {T49}}
+
+    //  op_time 0.001000
+    //          Op Cnt   T_tot Percent  T_mean
+    //      Matmul   1   0.024    96.1   0.024
+    //     Reshape   1   0.000     0.0   0.000
+    //    MemBound   2   0.001     3.9   0.001
+    // Origin Perf: 0.405595
+    // Best Perf without correction: 0.0254715
+    // Best Perf with correction: 0.0254715
+    // Transpose perf: 0
+    // clang-format on
+    EXPECT_EQ(bestGraph->getOperators().size(), 4u);
+    auto cntOps = bestGraph->countOps();
+    EXPECT_EQ(cntOps["Matmul"], 1);
+    EXPECT_EQ(cntOps["Reshape"], 1);
+    EXPECT_EQ(cntOps["MemBound"], 2);
+    bestGraph->print();
+    EXPECT_TRUE(graph->verification(bestGraph.get(), true));
+}
+
+TEST(Mutator, Conv2gemm7x1_searchEngine_ruleBased) {
+    const int N = 1, C = 2048, H = 7, W = 7, F = 128, R = 7,
+              S = 1; // gcn_Conv_137
+    auto g = new tpm::Graph();
+    auto i0 = g->tensor({N, C, H, W});
+    auto w1 = g->tensor({F, C, R, S});
+    g->conv(i0, w1, R / 2, S / 2);
+
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    const vector<int> rules = {3, 2, 2, 5, 8, 8, 6, 90};
+    tpm::SearchEngine searchEngine(make_shared<tpm::NMutator>(rules));
+    searchEngine.run(graph, bestGraph);
+
+    EXPECT_EQ(bestGraph->getOperators().size(), 4u);
+    auto cntOps = bestGraph->countOps();
+    EXPECT_EQ(cntOps["Matmul"], 1);
+    EXPECT_EQ(cntOps["Reshape"], 1);
+    EXPECT_EQ(cntOps["MemBound"], 2);
+    bestGraph->print();
+    EXPECT_TRUE(graph->verification(bestGraph.get(), true));
+}
+
+TEST(Mutator, Conv2gemm7x1_searchEngine_search) {
+    const int N = 1, C = 2048, H = 7, W = 7, F = 128, R = 7,
+              S = 1; // gcn_Conv_137
+    auto g = new tpm::Graph();
+    auto i0 = g->tensor({N, C, H, W});
+    auto w1 = g->tensor({F, C, R, S});
+    g->conv(i0, w1, R / 2, S / 2);
+
+    g->updateConnection();
+
+    std::shared_ptr<tpm::SubGraph> graph, bestGraph;
+    graph = std::make_shared<tpm::SubGraph>(g->getOperators());
+    // const vector<int> rules = {3, 2, 2, 5, 8, 8, 6, 90};
+    tpm::SearchEngine searchEngine(make_shared<tpm::NMutator>());
+    searchEngine.run(graph, bestGraph);
+
+    EXPECT_EQ(bestGraph->getOperators().size(), 4u);
+    auto cntOps = bestGraph->countOps();
+    EXPECT_EQ(cntOps["Matmul"], 1);
+    EXPECT_EQ(cntOps["Reshape"], 1);
+    EXPECT_EQ(cntOps["MemBound"], 2);
+    bestGraph->print();
+    EXPECT_TRUE(graph->verification(bestGraph.get(), true));
+}
\ No newline at end of file
diff --git a/test/nnet/failed/test_operatorCompute.cc b/test/nnet/failed/test_operatorCompute.cc
new file mode 100644
index 00000000..82dba0be
--- /dev/null
+++ b/test/nnet/failed/test_operatorCompute.cc
@@ -0,0 +1,34 @@
+#include "graph.h"
+#include "operator.h"
+#include "tensor.h"
+#include "gtest/gtest.h"
+using namespace std;
+
+TEST(OperatorCompute, Conv) {
+    const int N = 1, C = 2, H = 2, W = 2, F = 2, R = 3, S = 3;
+    auto g = new tpm::Graph();
+    auto i = g->tensor({N, C, H, W});
+    auto w = g->tensor({F, C, R, S});
+    auto o = g->tensor({N, F, H, W});
+    auto conv = g->conv(i, w, o, tpm::ConvOp::PaddingMode::Same);
+    vector<tpm::VType> dataI{0, 1, 0, 0, 1, 0, 1, 0};
+    vector<tpm::VType> dataW{1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1,
+                             0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1,
+                             0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1};
+    vector<tpm::VType> dataO{2, 1, 1, 1, 2, 0, 1, 1};
+    EXPECT_EQ(i->size(), dataI.size());
+    EXPECT_EQ(w->size(), dataW.size());
+    EXPECT_EQ(o->size(), dataO.size());
+    i->dataMalloc();
+    w->dataMalloc();
+    o->dataMalloc();
+    i->setData(dataI.data());
+    w->setData(dataW.data());
+    conv->compute();
+    tpm::SubGraph s(g->getOperators());
+    s.print();
+    for (size_t i = 0; i < dataO.size(); ++i)
+        EXPECT_EQ(o->getData(i), dataO[i]);
+}
+// Conv[552052564]([1,2,2,2],[2,2,3,3],[0,0,0,0],[0,0,0,0],p=[1,1],s=[1,1],d=[1,1],act=0,input=0,weight=1,output=2)
+// Conv[552052564]([1,2,2,2],[2,2,3,3],[0,0,0,0],[0,0,0,0],p=[1,1],s=[1,1],d=[1,1],act=0,input=0,weight=1,output=2)
diff --git a/test/nnet/failed/test_transposeOpToExpr.cc b/test/nnet/failed/test_transposeOpToExpr.cc
new file mode 100644
index 00000000..b0eb1d83
--- /dev/null
+++ b/test/nnet/failed/test_transposeOpToExpr.cc
@@ -0,0 +1,26 @@
+#include "graph.h"
+#include "nnet/Visitor/HashVisitor.h"
+#include "nnet/nmutator.h"
+#include "operator.h"
+#include "search_engine.h"
+#include "gtest/gtest.h"
+using namespace infini;
+using namespace nnet;
+using namespace std;
+
+TEST(TransposeOp2Expr, Basic) {
+    auto g = new tpm::Graph();
+    auto AT = g->tensor({2, 4, 8, 16});
+    auto op = new TransposeOp(AT, -1, {3, 1, 2, 0});
+
+    auto i = make_ref<VarNode>("i");
+    auto j = make_ref<VarNode>("j");
+    auto k = make_ref<VarNode>("k");
+    auto l = make_ref<VarNode>("l");
+    auto AN = make_ref<TensorNode>("A", vector<int>({2, 4, 6, 8}));
+    auto subA = makeSubscript(AN, {l, j, k, i});
+    auto ans = makeRangeOperator(
+        {{i, {0, 16}}, {j, {0, 4}}, {k, {0, 8}}, {l, {0, 2}}}, {}, subA);
+    ASSERT_TRUE(HashVisitor().getHash(ans) ==
+                HashVisitor().getHash(tpm::transposeOpToExpression(op)));
+}
\ No newline at end of file
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_0.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_0.expr
new file mode 100644
index 00000000..542d4b5f
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_0.expr
@@ -0,0 +1,395 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 0 Init: ruleBasedDFS: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "36",
+            "37",
+            "38",
+            "43",
+            "48",
+            "53"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ],
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "18"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "12",
+            "17"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "11"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "10"
+    },
+    "9": {
+        "type": 5,
+        "name": "x1"
+    },
+    "10": {
+        "type": 5,
+        "name": "r"
+    },
+    "11": {
+        "type": 0,
+        "val": -1
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "16"
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "y1"
+    },
+    "15": {
+        "type": 5,
+        "name": "s"
+    },
+    "16": {
+        "type": 0,
+        "val": -1
+    },
+    "17": {
+        "type": 5,
+        "name": "f"
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "27",
+            "34",
+            "35"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "26"
+    },
+    "21": {
+        "type": 1,
+        "opType": 5,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 0,
+        "val": 2
+    },
+    "23": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 0,
+        "val": 2
+    },
+    "25": {
+        "type": 5,
+        "name": "r"
+    },
+    "26": {
+        "type": 5,
+        "name": "x2"
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "33"
+    },
+    "28": {
+        "type": 1,
+        "opType": 5,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 0,
+        "val": 2
+    },
+    "30": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 0,
+        "val": 2
+    },
+    "32": {
+        "type": 5,
+        "name": "s"
+    },
+    "33": {
+        "type": 5,
+        "name": "y2"
+    },
+    "34": {
+        "type": 5,
+        "name": "f"
+    },
+    "35": {
+        "type": 5,
+        "name": "c"
+    },
+    "36": {
+        "type": 5,
+        "name": "n"
+    },
+    "37": {
+        "type": 5,
+        "name": "c"
+    },
+    "38": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "39",
+        "rhs": "42"
+    },
+    "39": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 5,
+        "name": "h"
+    },
+    "41": {
+        "type": 0,
+        "val": 1
+    },
+    "42": {
+        "type": 0,
+        "val": 2
+    },
+    "43": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "44",
+        "rhs": "47"
+    },
+    "44": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "45",
+        "rhs": "46"
+    },
+    "45": {
+        "type": 5,
+        "name": "h"
+    },
+    "46": {
+        "type": 0,
+        "val": 1
+    },
+    "47": {
+        "type": 0,
+        "val": 2
+    },
+    "48": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "49",
+        "rhs": "52"
+    },
+    "49": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "50",
+        "rhs": "51"
+    },
+    "50": {
+        "type": 5,
+        "name": "w"
+    },
+    "51": {
+        "type": 0,
+        "val": 1
+    },
+    "52": {
+        "type": 0,
+        "val": 2
+    },
+    "53": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "54",
+        "rhs": "57"
+    },
+    "54": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 5,
+        "name": "w"
+    },
+    "56": {
+        "type": 0,
+        "val": 1
+    },
+    "57": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_1.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_1.expr
new file mode 100644
index 00000000..24ca0544
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_1.expr
@@ -0,0 +1,492 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 1 Rule3StageSplit: Separate sum iters: [r,s]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "46",
+            "47",
+            "48",
+            "53",
+            "58",
+            "63"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 8,
+        "object": "4",
+        "indexes": [
+            "38",
+            "39",
+            "40",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "20"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "14",
+            "19"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "13"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "x1"
+    },
+    "12": {
+        "type": 5,
+        "name": "r"
+    },
+    "13": {
+        "type": 0,
+        "val": -1
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "18"
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "y1"
+    },
+    "17": {
+        "type": 5,
+        "name": "s"
+    },
+    "18": {
+        "type": 0,
+        "val": -1
+    },
+    "19": {
+        "type": 5,
+        "name": "f"
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "22",
+            "29",
+            "36",
+            "37"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "28"
+    },
+    "23": {
+        "type": 1,
+        "opType": 5,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 0,
+        "val": 2
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 2
+    },
+    "27": {
+        "type": 5,
+        "name": "r"
+    },
+    "28": {
+        "type": 5,
+        "name": "x2"
+    },
+    "29": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "30",
+        "rhs": "35"
+    },
+    "30": {
+        "type": 1,
+        "opType": 5,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 0,
+        "val": 2
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 0,
+        "val": 2
+    },
+    "34": {
+        "type": 5,
+        "name": "s"
+    },
+    "35": {
+        "type": 5,
+        "name": "y2"
+    },
+    "36": {
+        "type": 5,
+        "name": "f"
+    },
+    "37": {
+        "type": 5,
+        "name": "c"
+    },
+    "38": {
+        "type": 5,
+        "name": "r"
+    },
+    "39": {
+        "type": 5,
+        "name": "s"
+    },
+    "40": {
+        "type": 5,
+        "name": "n"
+    },
+    "41": {
+        "type": 5,
+        "name": "c"
+    },
+    "42": {
+        "type": 5,
+        "name": "x1"
+    },
+    "43": {
+        "type": 5,
+        "name": "x2"
+    },
+    "44": {
+        "type": 5,
+        "name": "y1"
+    },
+    "45": {
+        "type": 5,
+        "name": "y2"
+    },
+    "46": {
+        "type": 5,
+        "name": "n"
+    },
+    "47": {
+        "type": 5,
+        "name": "c"
+    },
+    "48": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "49",
+        "rhs": "52"
+    },
+    "49": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "50",
+        "rhs": "51"
+    },
+    "50": {
+        "type": 5,
+        "name": "h"
+    },
+    "51": {
+        "type": 0,
+        "val": 1
+    },
+    "52": {
+        "type": 0,
+        "val": 2
+    },
+    "53": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "54",
+        "rhs": "57"
+    },
+    "54": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 5,
+        "name": "h"
+    },
+    "56": {
+        "type": 0,
+        "val": 1
+    },
+    "57": {
+        "type": 0,
+        "val": 2
+    },
+    "58": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "59",
+        "rhs": "62"
+    },
+    "59": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 5,
+        "name": "w"
+    },
+    "61": {
+        "type": 0,
+        "val": 1
+    },
+    "62": {
+        "type": 0,
+        "val": 2
+    },
+    "63": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "64",
+        "rhs": "67"
+    },
+    "64": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 5,
+        "name": "w"
+    },
+    "66": {
+        "type": 0,
+        "val": 1
+    },
+    "67": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_10.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_10.expr
new file mode 100644
index 00000000..94fec3ee
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_10.expr
@@ -0,0 +1,472 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 10 Rule5RangeRelaxation: i31 (0,4) to (1,3),i23 (0,4) to (1,3),",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "44",
+            "45",
+            "46",
+            "51",
+            "56",
+            "61"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "22",
+            "29",
+            "36",
+            "39",
+            "42",
+            "43"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            1,
+            1,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                1,
+                3
+            ],
+            "i23": [
+                1,
+                3
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "16"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "12",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i23"
+    },
+    "11": {
+        "type": 0,
+        "val": -1
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "i31"
+    },
+    "14": {
+        "type": 0,
+        "val": -1
+    },
+    "15": {
+        "type": 5,
+        "name": "f"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "i34"
+    },
+    "19": {
+        "type": 5,
+        "name": "i36"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "28"
+    },
+    "23": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "y2"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": -2
+    },
+    "27": {
+        "type": 5,
+        "name": "s"
+    },
+    "28": {
+        "type": 0,
+        "val": 2
+    },
+    "29": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "30",
+        "rhs": "35"
+    },
+    "30": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 5,
+        "name": "x2"
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 0,
+        "val": -2
+    },
+    "34": {
+        "type": 5,
+        "name": "r"
+    },
+    "35": {
+        "type": 0,
+        "val": 2
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "y1"
+    },
+    "38": {
+        "type": 5,
+        "name": "s"
+    },
+    "39": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 5,
+        "name": "x1"
+    },
+    "41": {
+        "type": 5,
+        "name": "r"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "c"
+    },
+    "44": {
+        "type": 5,
+        "name": "n"
+    },
+    "45": {
+        "type": 5,
+        "name": "c"
+    },
+    "46": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "47",
+        "rhs": "50"
+    },
+    "47": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "48",
+        "rhs": "49"
+    },
+    "48": {
+        "type": 5,
+        "name": "h"
+    },
+    "49": {
+        "type": 0,
+        "val": 1
+    },
+    "50": {
+        "type": 0,
+        "val": 2
+    },
+    "51": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "52",
+        "rhs": "55"
+    },
+    "52": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "53",
+        "rhs": "54"
+    },
+    "53": {
+        "type": 5,
+        "name": "h"
+    },
+    "54": {
+        "type": 0,
+        "val": 1
+    },
+    "55": {
+        "type": 0,
+        "val": 2
+    },
+    "56": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "57",
+        "rhs": "60"
+    },
+    "57": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 5,
+        "name": "w"
+    },
+    "59": {
+        "type": 0,
+        "val": 1
+    },
+    "60": {
+        "type": 0,
+        "val": 2
+    },
+    "61": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "62",
+        "rhs": "65"
+    },
+    "62": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "63",
+        "rhs": "64"
+    },
+    "63": {
+        "type": 5,
+        "name": "w"
+    },
+    "64": {
+        "type": 0,
+        "val": 1
+    },
+    "65": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_11.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_11.expr
new file mode 100644
index 00000000..6d6c8b7b
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_11.expr
@@ -0,0 +1,683 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 11 Rule8GuidedDLT: Toward Matmul. guidedDLTMoreVar2 ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "73",
+            "74",
+            "75",
+            "80",
+            "85",
+            "90"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "51",
+            "58",
+            "65",
+            "68",
+            "71",
+            "72"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            1,
+            1,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                1,
+                3
+            ],
+            "i23": [
+                1,
+                3
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "6",
+        "indexes": [
+            "35",
+            "48",
+            "49",
+            "50"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "29"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "9",
+        "indexes": [
+            "27",
+            "28"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "10"
+    },
+    "10": {
+        "type": 105,
+        "inputs": [
+            "11"
+        ],
+        "expr": "12",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "13": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "14",
+        "indexes": [
+            "15",
+            "18",
+            "23",
+            "26"
+        ]
+    },
+    "14": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "15": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i37"
+    },
+    "17": {
+        "type": 0,
+        "val": 4
+    },
+    "18": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "19",
+        "rhs": "22"
+    },
+    "19": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 5,
+        "name": "i37"
+    },
+    "21": {
+        "type": 0,
+        "val": 2
+    },
+    "22": {
+        "type": 0,
+        "val": 2
+    },
+    "23": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "i37"
+    },
+    "25": {
+        "type": 0,
+        "val": 2
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "i37"
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "30",
+        "indexes": [
+            "31",
+            "32",
+            "33",
+            "34"
+        ]
+    },
+    "30": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "31": {
+        "type": 5,
+        "name": "i34"
+    },
+    "32": {
+        "type": 5,
+        "name": "i36"
+    },
+    "33": {
+        "type": 5,
+        "name": "f"
+    },
+    "34": {
+        "type": 5,
+        "name": "c"
+    },
+    "35": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "36",
+        "rhs": "45"
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "40"
+    },
+    "37": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "38",
+        "rhs": "39"
+    },
+    "38": {
+        "type": 0,
+        "val": 4
+    },
+    "39": {
+        "type": 5,
+        "name": "n"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 2
+    },
+    "42": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "43",
+        "rhs": "44"
+    },
+    "43": {
+        "type": 5,
+        "name": "i23"
+    },
+    "44": {
+        "type": 0,
+        "val": -1
+    },
+    "45": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "46",
+        "rhs": "47"
+    },
+    "46": {
+        "type": 5,
+        "name": "i31"
+    },
+    "47": {
+        "type": 0,
+        "val": -1
+    },
+    "48": {
+        "type": 5,
+        "name": "i36"
+    },
+    "49": {
+        "type": 5,
+        "name": "i34"
+    },
+    "50": {
+        "type": 5,
+        "name": "c"
+    },
+    "51": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "52",
+        "rhs": "57"
+    },
+    "52": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "53",
+        "rhs": "54"
+    },
+    "53": {
+        "type": 5,
+        "name": "y2"
+    },
+    "54": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 0,
+        "val": -2
+    },
+    "56": {
+        "type": 5,
+        "name": "s"
+    },
+    "57": {
+        "type": 0,
+        "val": 2
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "64"
+    },
+    "59": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 5,
+        "name": "x2"
+    },
+    "61": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "62",
+        "rhs": "63"
+    },
+    "62": {
+        "type": 0,
+        "val": -2
+    },
+    "63": {
+        "type": 5,
+        "name": "r"
+    },
+    "64": {
+        "type": 0,
+        "val": 2
+    },
+    "65": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 5,
+        "name": "y1"
+    },
+    "67": {
+        "type": 5,
+        "name": "s"
+    },
+    "68": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 5,
+        "name": "x1"
+    },
+    "70": {
+        "type": 5,
+        "name": "r"
+    },
+    "71": {
+        "type": 5,
+        "name": "n"
+    },
+    "72": {
+        "type": 5,
+        "name": "c"
+    },
+    "73": {
+        "type": 5,
+        "name": "n"
+    },
+    "74": {
+        "type": 5,
+        "name": "c"
+    },
+    "75": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "76",
+        "rhs": "79"
+    },
+    "76": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "77",
+        "rhs": "78"
+    },
+    "77": {
+        "type": 5,
+        "name": "h"
+    },
+    "78": {
+        "type": 0,
+        "val": 1
+    },
+    "79": {
+        "type": 0,
+        "val": 2
+    },
+    "80": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "81",
+        "rhs": "84"
+    },
+    "81": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "82",
+        "rhs": "83"
+    },
+    "82": {
+        "type": 5,
+        "name": "h"
+    },
+    "83": {
+        "type": 0,
+        "val": 1
+    },
+    "84": {
+        "type": 0,
+        "val": 2
+    },
+    "85": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "86",
+        "rhs": "89"
+    },
+    "86": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "87",
+        "rhs": "88"
+    },
+    "87": {
+        "type": 5,
+        "name": "w"
+    },
+    "88": {
+        "type": 0,
+        "val": 1
+    },
+    "89": {
+        "type": 0,
+        "val": 2
+    },
+    "90": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "91",
+        "rhs": "94"
+    },
+    "91": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "92",
+        "rhs": "93"
+    },
+    "92": {
+        "type": 5,
+        "name": "w"
+    },
+    "93": {
+        "type": 0,
+        "val": 1
+    },
+    "94": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_12.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_12.expr
new file mode 100644
index 00000000..5e25b287
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_12.expr
@@ -0,0 +1,874 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 12 Rule8GuidedDLT: Toward Matmul. guidedDLTMoreVar2 ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "100",
+            "101",
+            "102",
+            "107",
+            "112",
+            "117"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "78",
+            "85",
+            "92",
+            "95",
+            "98",
+            "99"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            1,
+            1,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                1,
+                3
+            ],
+            "i23": [
+                1,
+                3
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "6",
+        "indexes": [
+            "62",
+            "75",
+            "76",
+            "77"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "7": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "8",
+        "indexes": [
+            "52",
+            "61"
+        ]
+    },
+    "8": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "9",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "i37": [
+                0,
+                64
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "9": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "10",
+        "rhs": "31"
+    },
+    "10": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "11",
+        "indexes": [
+            "29",
+            "30"
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "12"
+    },
+    "12": {
+        "type": 105,
+        "inputs": [
+            "13"
+        ],
+        "expr": "14",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "13": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "14": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "15",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "15": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "16",
+        "indexes": [
+            "17",
+            "20",
+            "25",
+            "28"
+        ]
+    },
+    "16": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "17": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "18",
+        "rhs": "19"
+    },
+    "18": {
+        "type": 5,
+        "name": "i37"
+    },
+    "19": {
+        "type": 0,
+        "val": 4
+    },
+    "20": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "21",
+        "rhs": "24"
+    },
+    "21": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 5,
+        "name": "i37"
+    },
+    "23": {
+        "type": 0,
+        "val": 2
+    },
+    "24": {
+        "type": 0,
+        "val": 2
+    },
+    "25": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 5,
+        "name": "i37"
+    },
+    "27": {
+        "type": 0,
+        "val": 2
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 5,
+        "name": "i37"
+    },
+    "30": {
+        "type": 5,
+        "name": "f"
+    },
+    "31": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "32",
+        "indexes": [
+            "50",
+            "51"
+        ]
+    },
+    "32": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "33"
+    },
+    "33": {
+        "type": 105,
+        "inputs": [
+            "34"
+        ],
+        "expr": "35",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "34": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "35": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "36",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "36": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "37",
+        "indexes": [
+            "38",
+            "41",
+            "46",
+            "47"
+        ]
+    },
+    "37": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "38": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "i38"
+    },
+    "40": {
+        "type": 0,
+        "val": 1024
+    },
+    "41": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "42",
+        "rhs": "45"
+    },
+    "42": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "43",
+        "rhs": "44"
+    },
+    "43": {
+        "type": 5,
+        "name": "i38"
+    },
+    "44": {
+        "type": 0,
+        "val": 256
+    },
+    "45": {
+        "type": 0,
+        "val": 4
+    },
+    "46": {
+        "type": 5,
+        "name": "f"
+    },
+    "47": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "48",
+        "rhs": "49"
+    },
+    "48": {
+        "type": 5,
+        "name": "i38"
+    },
+    "49": {
+        "type": 0,
+        "val": 256
+    },
+    "50": {
+        "type": 5,
+        "name": "i38"
+    },
+    "51": {
+        "type": 5,
+        "name": "f"
+    },
+    "52": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "53",
+        "rhs": "60"
+    },
+    "53": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "54",
+        "rhs": "57"
+    },
+    "54": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 0,
+        "val": 1024
+    },
+    "56": {
+        "type": 5,
+        "name": "i34"
+    },
+    "57": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 0,
+        "val": 256
+    },
+    "59": {
+        "type": 5,
+        "name": "i36"
+    },
+    "60": {
+        "type": 5,
+        "name": "c"
+    },
+    "61": {
+        "type": 5,
+        "name": "i37"
+    },
+    "62": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "63",
+        "rhs": "72"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "67"
+    },
+    "64": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 0,
+        "val": 4
+    },
+    "66": {
+        "type": 5,
+        "name": "n"
+    },
+    "67": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "68",
+        "rhs": "69"
+    },
+    "68": {
+        "type": 0,
+        "val": 2
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 5,
+        "name": "i23"
+    },
+    "71": {
+        "type": 0,
+        "val": -1
+    },
+    "72": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "73",
+        "rhs": "74"
+    },
+    "73": {
+        "type": 5,
+        "name": "i31"
+    },
+    "74": {
+        "type": 0,
+        "val": -1
+    },
+    "75": {
+        "type": 5,
+        "name": "i36"
+    },
+    "76": {
+        "type": 5,
+        "name": "i34"
+    },
+    "77": {
+        "type": 5,
+        "name": "c"
+    },
+    "78": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "79",
+        "rhs": "84"
+    },
+    "79": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "80",
+        "rhs": "81"
+    },
+    "80": {
+        "type": 5,
+        "name": "y2"
+    },
+    "81": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "82",
+        "rhs": "83"
+    },
+    "82": {
+        "type": 0,
+        "val": -2
+    },
+    "83": {
+        "type": 5,
+        "name": "s"
+    },
+    "84": {
+        "type": 0,
+        "val": 2
+    },
+    "85": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "86",
+        "rhs": "91"
+    },
+    "86": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "87",
+        "rhs": "88"
+    },
+    "87": {
+        "type": 5,
+        "name": "x2"
+    },
+    "88": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "89",
+        "rhs": "90"
+    },
+    "89": {
+        "type": 0,
+        "val": -2
+    },
+    "90": {
+        "type": 5,
+        "name": "r"
+    },
+    "91": {
+        "type": 0,
+        "val": 2
+    },
+    "92": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "93",
+        "rhs": "94"
+    },
+    "93": {
+        "type": 5,
+        "name": "y1"
+    },
+    "94": {
+        "type": 5,
+        "name": "s"
+    },
+    "95": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "96",
+        "rhs": "97"
+    },
+    "96": {
+        "type": 5,
+        "name": "x1"
+    },
+    "97": {
+        "type": 5,
+        "name": "r"
+    },
+    "98": {
+        "type": 5,
+        "name": "n"
+    },
+    "99": {
+        "type": 5,
+        "name": "c"
+    },
+    "100": {
+        "type": 5,
+        "name": "n"
+    },
+    "101": {
+        "type": 5,
+        "name": "c"
+    },
+    "102": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "103",
+        "rhs": "106"
+    },
+    "103": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "104",
+        "rhs": "105"
+    },
+    "104": {
+        "type": 5,
+        "name": "h"
+    },
+    "105": {
+        "type": 0,
+        "val": 1
+    },
+    "106": {
+        "type": 0,
+        "val": 2
+    },
+    "107": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "108",
+        "rhs": "111"
+    },
+    "108": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "109",
+        "rhs": "110"
+    },
+    "109": {
+        "type": 5,
+        "name": "h"
+    },
+    "110": {
+        "type": 0,
+        "val": 1
+    },
+    "111": {
+        "type": 0,
+        "val": 2
+    },
+    "112": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "113",
+        "rhs": "116"
+    },
+    "113": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "114",
+        "rhs": "115"
+    },
+    "114": {
+        "type": 5,
+        "name": "w"
+    },
+    "115": {
+        "type": 0,
+        "val": 1
+    },
+    "116": {
+        "type": 0,
+        "val": 2
+    },
+    "117": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "118",
+        "rhs": "121"
+    },
+    "118": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "119",
+        "rhs": "120"
+    },
+    "119": {
+        "type": 5,
+        "name": "w"
+    },
+    "120": {
+        "type": 0,
+        "val": 1
+    },
+    "121": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_13.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_13.expr
new file mode 100644
index 00000000..55a3e459
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_13.expr
@@ -0,0 +1,819 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 13 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "94",
+            "95",
+            "96",
+            "101",
+            "106",
+            "111"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "72",
+            "79",
+            "86",
+            "89",
+            "92",
+            "93"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            1,
+            1,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                1,
+                3
+            ],
+            "i23": [
+                1,
+                3
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "6",
+        "indexes": [
+            "50",
+            "59"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "i37": [
+                0,
+                64
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "29"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "9",
+        "indexes": [
+            "27",
+            "28"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "10"
+    },
+    "10": {
+        "type": 105,
+        "inputs": [
+            "11"
+        ],
+        "expr": "12",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "13": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "14",
+        "indexes": [
+            "15",
+            "18",
+            "23",
+            "26"
+        ]
+    },
+    "14": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "15": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i37"
+    },
+    "17": {
+        "type": 0,
+        "val": 4
+    },
+    "18": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "19",
+        "rhs": "22"
+    },
+    "19": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 5,
+        "name": "i37"
+    },
+    "21": {
+        "type": 0,
+        "val": 2
+    },
+    "22": {
+        "type": 0,
+        "val": 2
+    },
+    "23": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "i37"
+    },
+    "25": {
+        "type": 0,
+        "val": 2
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "i37"
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "30",
+        "indexes": [
+            "48",
+            "49"
+        ]
+    },
+    "30": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "31"
+    },
+    "31": {
+        "type": 105,
+        "inputs": [
+            "32"
+        ],
+        "expr": "33",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "32": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "33": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "34",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "34": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "35",
+        "indexes": [
+            "36",
+            "39",
+            "44",
+            "45"
+        ]
+    },
+    "35": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "36": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i38"
+    },
+    "38": {
+        "type": 0,
+        "val": 1024
+    },
+    "39": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "40",
+        "rhs": "43"
+    },
+    "40": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 5,
+        "name": "i38"
+    },
+    "42": {
+        "type": 0,
+        "val": 256
+    },
+    "43": {
+        "type": 0,
+        "val": 4
+    },
+    "44": {
+        "type": 5,
+        "name": "f"
+    },
+    "45": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "46",
+        "rhs": "47"
+    },
+    "46": {
+        "type": 5,
+        "name": "i38"
+    },
+    "47": {
+        "type": 0,
+        "val": 256
+    },
+    "48": {
+        "type": 5,
+        "name": "i38"
+    },
+    "49": {
+        "type": 5,
+        "name": "f"
+    },
+    "50": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "51",
+        "rhs": "58"
+    },
+    "51": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "52",
+        "rhs": "55"
+    },
+    "52": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "53",
+        "rhs": "54"
+    },
+    "53": {
+        "type": 0,
+        "val": 1024
+    },
+    "54": {
+        "type": 5,
+        "name": "i34"
+    },
+    "55": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "56",
+        "rhs": "57"
+    },
+    "56": {
+        "type": 0,
+        "val": 256
+    },
+    "57": {
+        "type": 5,
+        "name": "i36"
+    },
+    "58": {
+        "type": 5,
+        "name": "c"
+    },
+    "59": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "60",
+        "rhs": "69"
+    },
+    "60": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "61",
+        "rhs": "64"
+    },
+    "61": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "62",
+        "rhs": "63"
+    },
+    "62": {
+        "type": 0,
+        "val": 4
+    },
+    "63": {
+        "type": 5,
+        "name": "n"
+    },
+    "64": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 0,
+        "val": 2
+    },
+    "66": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "67",
+        "rhs": "68"
+    },
+    "67": {
+        "type": 5,
+        "name": "i23"
+    },
+    "68": {
+        "type": 0,
+        "val": -1
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 5,
+        "name": "i31"
+    },
+    "71": {
+        "type": 0,
+        "val": -1
+    },
+    "72": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "73",
+        "rhs": "78"
+    },
+    "73": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "74",
+        "rhs": "75"
+    },
+    "74": {
+        "type": 5,
+        "name": "y2"
+    },
+    "75": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "76",
+        "rhs": "77"
+    },
+    "76": {
+        "type": 0,
+        "val": -2
+    },
+    "77": {
+        "type": 5,
+        "name": "s"
+    },
+    "78": {
+        "type": 0,
+        "val": 2
+    },
+    "79": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "80",
+        "rhs": "85"
+    },
+    "80": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "81",
+        "rhs": "82"
+    },
+    "81": {
+        "type": 5,
+        "name": "x2"
+    },
+    "82": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "83",
+        "rhs": "84"
+    },
+    "83": {
+        "type": 0,
+        "val": -2
+    },
+    "84": {
+        "type": 5,
+        "name": "r"
+    },
+    "85": {
+        "type": 0,
+        "val": 2
+    },
+    "86": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "87",
+        "rhs": "88"
+    },
+    "87": {
+        "type": 5,
+        "name": "y1"
+    },
+    "88": {
+        "type": 5,
+        "name": "s"
+    },
+    "89": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "90",
+        "rhs": "91"
+    },
+    "90": {
+        "type": 5,
+        "name": "x1"
+    },
+    "91": {
+        "type": 5,
+        "name": "r"
+    },
+    "92": {
+        "type": 5,
+        "name": "n"
+    },
+    "93": {
+        "type": 5,
+        "name": "c"
+    },
+    "94": {
+        "type": 5,
+        "name": "n"
+    },
+    "95": {
+        "type": 5,
+        "name": "c"
+    },
+    "96": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "97",
+        "rhs": "100"
+    },
+    "97": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "98",
+        "rhs": "99"
+    },
+    "98": {
+        "type": 5,
+        "name": "h"
+    },
+    "99": {
+        "type": 0,
+        "val": 1
+    },
+    "100": {
+        "type": 0,
+        "val": 2
+    },
+    "101": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "102",
+        "rhs": "105"
+    },
+    "102": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "103",
+        "rhs": "104"
+    },
+    "103": {
+        "type": 5,
+        "name": "h"
+    },
+    "104": {
+        "type": 0,
+        "val": 1
+    },
+    "105": {
+        "type": 0,
+        "val": 2
+    },
+    "106": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "107",
+        "rhs": "110"
+    },
+    "107": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "108",
+        "rhs": "109"
+    },
+    "108": {
+        "type": 5,
+        "name": "w"
+    },
+    "109": {
+        "type": 0,
+        "val": 1
+    },
+    "110": {
+        "type": 0,
+        "val": 2
+    },
+    "111": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "112",
+        "rhs": "115"
+    },
+    "112": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "113",
+        "rhs": "114"
+    },
+    "113": {
+        "type": 5,
+        "name": "w"
+    },
+    "114": {
+        "type": 0,
+        "val": 1
+    },
+    "115": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_14.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_14.expr
new file mode 100644
index 00000000..9b160649
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_14.expr
@@ -0,0 +1,854 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 14 Rule8GuidedDLT: Toward Matmul. guidedDLTDLMismatch ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "98",
+            "99",
+            "100",
+            "105",
+            "110",
+            "115"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "76",
+            "83",
+            "90",
+            "93",
+            "96",
+            "97"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            1,
+            1,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                1,
+                3
+            ],
+            "i23": [
+                1,
+                3
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "6",
+        "indexes": [
+            "54",
+            "63"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "i37": [
+                0,
+                64
+            ]
+        }
+    },
+    "7": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "8",
+        "indexes": [
+            "52",
+            "53"
+        ]
+    },
+    "8": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "9",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "i38": [
+                0,
+                4096
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "9": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "10",
+        "rhs": "31"
+    },
+    "10": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "11",
+        "indexes": [
+            "29",
+            "30"
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "12"
+    },
+    "12": {
+        "type": 105,
+        "inputs": [
+            "13"
+        ],
+        "expr": "14",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "13": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "14": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "15",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "15": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "16",
+        "indexes": [
+            "17",
+            "20",
+            "25",
+            "28"
+        ]
+    },
+    "16": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "17": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "18",
+        "rhs": "19"
+    },
+    "18": {
+        "type": 5,
+        "name": "i37"
+    },
+    "19": {
+        "type": 0,
+        "val": 4
+    },
+    "20": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "21",
+        "rhs": "24"
+    },
+    "21": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 5,
+        "name": "i37"
+    },
+    "23": {
+        "type": 0,
+        "val": 2
+    },
+    "24": {
+        "type": 0,
+        "val": 2
+    },
+    "25": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 5,
+        "name": "i37"
+    },
+    "27": {
+        "type": 0,
+        "val": 2
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 5,
+        "name": "i37"
+    },
+    "30": {
+        "type": 5,
+        "name": "f"
+    },
+    "31": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "32",
+        "indexes": [
+            "50",
+            "51"
+        ]
+    },
+    "32": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "33"
+    },
+    "33": {
+        "type": 105,
+        "inputs": [
+            "34"
+        ],
+        "expr": "35",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "34": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "35": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "36",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "36": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "37",
+        "indexes": [
+            "38",
+            "41",
+            "46",
+            "47"
+        ]
+    },
+    "37": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "38": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "i38"
+    },
+    "40": {
+        "type": 0,
+        "val": 1024
+    },
+    "41": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "42",
+        "rhs": "45"
+    },
+    "42": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "43",
+        "rhs": "44"
+    },
+    "43": {
+        "type": 5,
+        "name": "i38"
+    },
+    "44": {
+        "type": 0,
+        "val": 256
+    },
+    "45": {
+        "type": 0,
+        "val": 4
+    },
+    "46": {
+        "type": 5,
+        "name": "f"
+    },
+    "47": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "48",
+        "rhs": "49"
+    },
+    "48": {
+        "type": 5,
+        "name": "i38"
+    },
+    "49": {
+        "type": 0,
+        "val": 256
+    },
+    "50": {
+        "type": 5,
+        "name": "i38"
+    },
+    "51": {
+        "type": 5,
+        "name": "f"
+    },
+    "52": {
+        "type": 5,
+        "name": "i37"
+    },
+    "53": {
+        "type": 5,
+        "name": "i38"
+    },
+    "54": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "55",
+        "rhs": "62"
+    },
+    "55": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "56",
+        "rhs": "59"
+    },
+    "56": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "57",
+        "rhs": "58"
+    },
+    "57": {
+        "type": 0,
+        "val": 1024
+    },
+    "58": {
+        "type": 5,
+        "name": "i34"
+    },
+    "59": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 0,
+        "val": 256
+    },
+    "61": {
+        "type": 5,
+        "name": "i36"
+    },
+    "62": {
+        "type": 5,
+        "name": "c"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "73"
+    },
+    "64": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "65",
+        "rhs": "68"
+    },
+    "65": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 0,
+        "val": 4
+    },
+    "67": {
+        "type": 5,
+        "name": "n"
+    },
+    "68": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 0,
+        "val": 2
+    },
+    "70": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "71",
+        "rhs": "72"
+    },
+    "71": {
+        "type": 5,
+        "name": "i23"
+    },
+    "72": {
+        "type": 0,
+        "val": -1
+    },
+    "73": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "74",
+        "rhs": "75"
+    },
+    "74": {
+        "type": 5,
+        "name": "i31"
+    },
+    "75": {
+        "type": 0,
+        "val": -1
+    },
+    "76": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "77",
+        "rhs": "82"
+    },
+    "77": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "78",
+        "rhs": "79"
+    },
+    "78": {
+        "type": 5,
+        "name": "y2"
+    },
+    "79": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "80",
+        "rhs": "81"
+    },
+    "80": {
+        "type": 0,
+        "val": -2
+    },
+    "81": {
+        "type": 5,
+        "name": "s"
+    },
+    "82": {
+        "type": 0,
+        "val": 2
+    },
+    "83": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "84",
+        "rhs": "89"
+    },
+    "84": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "85",
+        "rhs": "86"
+    },
+    "85": {
+        "type": 5,
+        "name": "x2"
+    },
+    "86": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "87",
+        "rhs": "88"
+    },
+    "87": {
+        "type": 0,
+        "val": -2
+    },
+    "88": {
+        "type": 5,
+        "name": "r"
+    },
+    "89": {
+        "type": 0,
+        "val": 2
+    },
+    "90": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "91",
+        "rhs": "92"
+    },
+    "91": {
+        "type": 5,
+        "name": "y1"
+    },
+    "92": {
+        "type": 5,
+        "name": "s"
+    },
+    "93": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "94",
+        "rhs": "95"
+    },
+    "94": {
+        "type": 5,
+        "name": "x1"
+    },
+    "95": {
+        "type": 5,
+        "name": "r"
+    },
+    "96": {
+        "type": 5,
+        "name": "n"
+    },
+    "97": {
+        "type": 5,
+        "name": "c"
+    },
+    "98": {
+        "type": 5,
+        "name": "n"
+    },
+    "99": {
+        "type": 5,
+        "name": "c"
+    },
+    "100": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "101",
+        "rhs": "104"
+    },
+    "101": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "102",
+        "rhs": "103"
+    },
+    "102": {
+        "type": 5,
+        "name": "h"
+    },
+    "103": {
+        "type": 0,
+        "val": 1
+    },
+    "104": {
+        "type": 0,
+        "val": 2
+    },
+    "105": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "106",
+        "rhs": "109"
+    },
+    "106": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "107",
+        "rhs": "108"
+    },
+    "107": {
+        "type": 5,
+        "name": "h"
+    },
+    "108": {
+        "type": 0,
+        "val": 1
+    },
+    "109": {
+        "type": 0,
+        "val": 2
+    },
+    "110": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "111",
+        "rhs": "114"
+    },
+    "111": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "112",
+        "rhs": "113"
+    },
+    "112": {
+        "type": 5,
+        "name": "w"
+    },
+    "113": {
+        "type": 0,
+        "val": 1
+    },
+    "114": {
+        "type": 0,
+        "val": 2
+    },
+    "115": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "116",
+        "rhs": "119"
+    },
+    "116": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "117",
+        "rhs": "118"
+    },
+    "117": {
+        "type": 5,
+        "name": "w"
+    },
+    "118": {
+        "type": 0,
+        "val": 1
+    },
+    "119": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_15.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_15.expr
new file mode 100644
index 00000000..59926f6c
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_15.expr
@@ -0,0 +1,819 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 15 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "94",
+            "95",
+            "96",
+            "101",
+            "106",
+            "111"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "72",
+            "79",
+            "86",
+            "89",
+            "92",
+            "93"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            1,
+            1,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                1,
+                3
+            ],
+            "i23": [
+                1,
+                3
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "6",
+        "indexes": [
+            "50",
+            "63"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "i38": [
+                0,
+                4096
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "29"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "9",
+        "indexes": [
+            "27",
+            "28"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "10"
+    },
+    "10": {
+        "type": 105,
+        "inputs": [
+            "11"
+        ],
+        "expr": "12",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "13": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "14",
+        "indexes": [
+            "15",
+            "18",
+            "23",
+            "26"
+        ]
+    },
+    "14": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "15": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i37"
+    },
+    "17": {
+        "type": 0,
+        "val": 4
+    },
+    "18": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "19",
+        "rhs": "22"
+    },
+    "19": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 5,
+        "name": "i37"
+    },
+    "21": {
+        "type": 0,
+        "val": 2
+    },
+    "22": {
+        "type": 0,
+        "val": 2
+    },
+    "23": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "i37"
+    },
+    "25": {
+        "type": 0,
+        "val": 2
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "i37"
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "30",
+        "indexes": [
+            "48",
+            "49"
+        ]
+    },
+    "30": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "31"
+    },
+    "31": {
+        "type": 105,
+        "inputs": [
+            "32"
+        ],
+        "expr": "33",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "32": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "33": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "34",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "34": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "35",
+        "indexes": [
+            "36",
+            "39",
+            "44",
+            "45"
+        ]
+    },
+    "35": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "36": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i38"
+    },
+    "38": {
+        "type": 0,
+        "val": 1024
+    },
+    "39": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "40",
+        "rhs": "43"
+    },
+    "40": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 5,
+        "name": "i38"
+    },
+    "42": {
+        "type": 0,
+        "val": 256
+    },
+    "43": {
+        "type": 0,
+        "val": 4
+    },
+    "44": {
+        "type": 5,
+        "name": "f"
+    },
+    "45": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "46",
+        "rhs": "47"
+    },
+    "46": {
+        "type": 5,
+        "name": "i38"
+    },
+    "47": {
+        "type": 0,
+        "val": 256
+    },
+    "48": {
+        "type": 5,
+        "name": "i38"
+    },
+    "49": {
+        "type": 5,
+        "name": "f"
+    },
+    "50": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "51",
+        "rhs": "60"
+    },
+    "51": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "52",
+        "rhs": "55"
+    },
+    "52": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "53",
+        "rhs": "54"
+    },
+    "53": {
+        "type": 0,
+        "val": 4
+    },
+    "54": {
+        "type": 5,
+        "name": "n"
+    },
+    "55": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "56",
+        "rhs": "57"
+    },
+    "56": {
+        "type": 0,
+        "val": 2
+    },
+    "57": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 5,
+        "name": "i23"
+    },
+    "59": {
+        "type": 0,
+        "val": -1
+    },
+    "60": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "61",
+        "rhs": "62"
+    },
+    "61": {
+        "type": 5,
+        "name": "i31"
+    },
+    "62": {
+        "type": 0,
+        "val": -1
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "71"
+    },
+    "64": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "65",
+        "rhs": "68"
+    },
+    "65": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 0,
+        "val": 1024
+    },
+    "67": {
+        "type": 5,
+        "name": "i34"
+    },
+    "68": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 0,
+        "val": 256
+    },
+    "70": {
+        "type": 5,
+        "name": "i36"
+    },
+    "71": {
+        "type": 5,
+        "name": "c"
+    },
+    "72": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "73",
+        "rhs": "78"
+    },
+    "73": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "74",
+        "rhs": "75"
+    },
+    "74": {
+        "type": 5,
+        "name": "y2"
+    },
+    "75": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "76",
+        "rhs": "77"
+    },
+    "76": {
+        "type": 0,
+        "val": -2
+    },
+    "77": {
+        "type": 5,
+        "name": "s"
+    },
+    "78": {
+        "type": 0,
+        "val": 2
+    },
+    "79": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "80",
+        "rhs": "85"
+    },
+    "80": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "81",
+        "rhs": "82"
+    },
+    "81": {
+        "type": 5,
+        "name": "x2"
+    },
+    "82": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "83",
+        "rhs": "84"
+    },
+    "83": {
+        "type": 0,
+        "val": -2
+    },
+    "84": {
+        "type": 5,
+        "name": "r"
+    },
+    "85": {
+        "type": 0,
+        "val": 2
+    },
+    "86": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "87",
+        "rhs": "88"
+    },
+    "87": {
+        "type": 5,
+        "name": "y1"
+    },
+    "88": {
+        "type": 5,
+        "name": "s"
+    },
+    "89": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "90",
+        "rhs": "91"
+    },
+    "90": {
+        "type": 5,
+        "name": "x1"
+    },
+    "91": {
+        "type": 5,
+        "name": "r"
+    },
+    "92": {
+        "type": 5,
+        "name": "n"
+    },
+    "93": {
+        "type": 5,
+        "name": "c"
+    },
+    "94": {
+        "type": 5,
+        "name": "n"
+    },
+    "95": {
+        "type": 5,
+        "name": "c"
+    },
+    "96": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "97",
+        "rhs": "100"
+    },
+    "97": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "98",
+        "rhs": "99"
+    },
+    "98": {
+        "type": 5,
+        "name": "h"
+    },
+    "99": {
+        "type": 0,
+        "val": 1
+    },
+    "100": {
+        "type": 0,
+        "val": 2
+    },
+    "101": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "102",
+        "rhs": "105"
+    },
+    "102": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "103",
+        "rhs": "104"
+    },
+    "103": {
+        "type": 5,
+        "name": "h"
+    },
+    "104": {
+        "type": 0,
+        "val": 1
+    },
+    "105": {
+        "type": 0,
+        "val": 2
+    },
+    "106": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "107",
+        "rhs": "110"
+    },
+    "107": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "108",
+        "rhs": "109"
+    },
+    "108": {
+        "type": 5,
+        "name": "w"
+    },
+    "109": {
+        "type": 0,
+        "val": 1
+    },
+    "110": {
+        "type": 0,
+        "val": 2
+    },
+    "111": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "112",
+        "rhs": "115"
+    },
+    "112": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "113",
+        "rhs": "114"
+    },
+    "113": {
+        "type": 5,
+        "name": "w"
+    },
+    "114": {
+        "type": 0,
+        "val": 1
+    },
+    "115": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_16.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_16.expr
new file mode 100644
index 00000000..1939af93
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_16.expr
@@ -0,0 +1,819 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 16 Rule8GuidedDLT: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "94",
+            "95",
+            "96",
+            "101",
+            "106",
+            "111"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "72",
+            "79",
+            "86",
+            "89",
+            "92",
+            "93"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            1,
+            1,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                1,
+                3
+            ],
+            "i23": [
+                1,
+                3
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "6",
+        "indexes": [
+            "50",
+            "63"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "i38": [
+                0,
+                4096
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "29"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "9",
+        "indexes": [
+            "27",
+            "28"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "10"
+    },
+    "10": {
+        "type": 105,
+        "inputs": [
+            "11"
+        ],
+        "expr": "12",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "13": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "14",
+        "indexes": [
+            "15",
+            "18",
+            "23",
+            "26"
+        ]
+    },
+    "14": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "15": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i37"
+    },
+    "17": {
+        "type": 0,
+        "val": 4
+    },
+    "18": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "19",
+        "rhs": "22"
+    },
+    "19": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 5,
+        "name": "i37"
+    },
+    "21": {
+        "type": 0,
+        "val": 2
+    },
+    "22": {
+        "type": 0,
+        "val": 2
+    },
+    "23": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "i37"
+    },
+    "25": {
+        "type": 0,
+        "val": 2
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "i37"
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "30",
+        "indexes": [
+            "48",
+            "49"
+        ]
+    },
+    "30": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "31"
+    },
+    "31": {
+        "type": 105,
+        "inputs": [
+            "32"
+        ],
+        "expr": "33",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "32": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "33": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "34",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "34": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "35",
+        "indexes": [
+            "36",
+            "39",
+            "44",
+            "45"
+        ]
+    },
+    "35": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "36": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i38"
+    },
+    "38": {
+        "type": 0,
+        "val": 1024
+    },
+    "39": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "40",
+        "rhs": "43"
+    },
+    "40": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 5,
+        "name": "i38"
+    },
+    "42": {
+        "type": 0,
+        "val": 256
+    },
+    "43": {
+        "type": 0,
+        "val": 4
+    },
+    "44": {
+        "type": 5,
+        "name": "f"
+    },
+    "45": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "46",
+        "rhs": "47"
+    },
+    "46": {
+        "type": 5,
+        "name": "i38"
+    },
+    "47": {
+        "type": 0,
+        "val": 256
+    },
+    "48": {
+        "type": 5,
+        "name": "i38"
+    },
+    "49": {
+        "type": 5,
+        "name": "f"
+    },
+    "50": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "51",
+        "rhs": "60"
+    },
+    "51": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "52",
+        "rhs": "55"
+    },
+    "52": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "53",
+        "rhs": "54"
+    },
+    "53": {
+        "type": 0,
+        "val": 4
+    },
+    "54": {
+        "type": 5,
+        "name": "n"
+    },
+    "55": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "56",
+        "rhs": "57"
+    },
+    "56": {
+        "type": 0,
+        "val": 2
+    },
+    "57": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 5,
+        "name": "i23"
+    },
+    "59": {
+        "type": 0,
+        "val": -1
+    },
+    "60": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "61",
+        "rhs": "62"
+    },
+    "61": {
+        "type": 5,
+        "name": "i31"
+    },
+    "62": {
+        "type": 0,
+        "val": -1
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "71"
+    },
+    "64": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "65",
+        "rhs": "68"
+    },
+    "65": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 0,
+        "val": 1024
+    },
+    "67": {
+        "type": 5,
+        "name": "i34"
+    },
+    "68": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 0,
+        "val": 256
+    },
+    "70": {
+        "type": 5,
+        "name": "i36"
+    },
+    "71": {
+        "type": 5,
+        "name": "c"
+    },
+    "72": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "73",
+        "rhs": "78"
+    },
+    "73": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "74",
+        "rhs": "75"
+    },
+    "74": {
+        "type": 5,
+        "name": "y2"
+    },
+    "75": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "76",
+        "rhs": "77"
+    },
+    "76": {
+        "type": 0,
+        "val": -2
+    },
+    "77": {
+        "type": 5,
+        "name": "s"
+    },
+    "78": {
+        "type": 0,
+        "val": 2
+    },
+    "79": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "80",
+        "rhs": "85"
+    },
+    "80": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "81",
+        "rhs": "82"
+    },
+    "81": {
+        "type": 5,
+        "name": "x2"
+    },
+    "82": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "83",
+        "rhs": "84"
+    },
+    "83": {
+        "type": 0,
+        "val": -2
+    },
+    "84": {
+        "type": 5,
+        "name": "r"
+    },
+    "85": {
+        "type": 0,
+        "val": 2
+    },
+    "86": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "87",
+        "rhs": "88"
+    },
+    "87": {
+        "type": 5,
+        "name": "y1"
+    },
+    "88": {
+        "type": 5,
+        "name": "s"
+    },
+    "89": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "90",
+        "rhs": "91"
+    },
+    "90": {
+        "type": 5,
+        "name": "x1"
+    },
+    "91": {
+        "type": 5,
+        "name": "r"
+    },
+    "92": {
+        "type": 5,
+        "name": "n"
+    },
+    "93": {
+        "type": 5,
+        "name": "c"
+    },
+    "94": {
+        "type": 5,
+        "name": "n"
+    },
+    "95": {
+        "type": 5,
+        "name": "c"
+    },
+    "96": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "97",
+        "rhs": "100"
+    },
+    "97": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "98",
+        "rhs": "99"
+    },
+    "98": {
+        "type": 5,
+        "name": "h"
+    },
+    "99": {
+        "type": 0,
+        "val": 1
+    },
+    "100": {
+        "type": 0,
+        "val": 2
+    },
+    "101": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "102",
+        "rhs": "105"
+    },
+    "102": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "103",
+        "rhs": "104"
+    },
+    "103": {
+        "type": 5,
+        "name": "h"
+    },
+    "104": {
+        "type": 0,
+        "val": 1
+    },
+    "105": {
+        "type": 0,
+        "val": 2
+    },
+    "106": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "107",
+        "rhs": "110"
+    },
+    "107": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "108",
+        "rhs": "109"
+    },
+    "108": {
+        "type": 5,
+        "name": "w"
+    },
+    "109": {
+        "type": 0,
+        "val": 1
+    },
+    "110": {
+        "type": 0,
+        "val": 2
+    },
+    "111": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "112",
+        "rhs": "115"
+    },
+    "112": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "113",
+        "rhs": "114"
+    },
+    "113": {
+        "type": 5,
+        "name": "w"
+    },
+    "114": {
+        "type": 0,
+        "val": 1
+    },
+    "115": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_17.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_17.expr
new file mode 100644
index 00000000..2a04494e
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_17.expr
@@ -0,0 +1,1134 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 17 MatchComputationKernel: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "132",
+            "133",
+            "134",
+            "139",
+            "144",
+            "149"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "110",
+            "117",
+            "124",
+            "127",
+            "130",
+            "131"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            1,
+            1,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                1,
+                3
+            ],
+            "i23": [
+                1,
+                3
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "6",
+        "indexes": [
+            "88",
+            "101"
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            64,
+            4096
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "7"
+    },
+    "7": {
+        "type": 101,
+        "inputs": [
+            "8",
+            "26"
+        ],
+        "expr": "44",
+        "args": [
+            1,
+            64,
+            4096,
+            448,
+            false,
+            true
+        ]
+    },
+    "8": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "9"
+    },
+    "9": {
+        "type": 105,
+        "inputs": [
+            "10"
+        ],
+        "expr": "11",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "10": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "11": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "12",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "12": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "13",
+        "indexes": [
+            "14",
+            "17",
+            "22",
+            "25"
+        ]
+    },
+    "13": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "14": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i37"
+    },
+    "16": {
+        "type": 0,
+        "val": 4
+    },
+    "17": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "18",
+        "rhs": "21"
+    },
+    "18": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i37"
+    },
+    "20": {
+        "type": 0,
+        "val": 2
+    },
+    "21": {
+        "type": 0,
+        "val": 2
+    },
+    "22": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "i37"
+    },
+    "24": {
+        "type": 0,
+        "val": 2
+    },
+    "25": {
+        "type": 5,
+        "name": "f"
+    },
+    "26": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "27"
+    },
+    "27": {
+        "type": 105,
+        "inputs": [
+            "28"
+        ],
+        "expr": "29",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "28": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "29": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "30",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "30": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "31",
+        "indexes": [
+            "32",
+            "35",
+            "40",
+            "41"
+        ]
+    },
+    "31": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "32": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "i38"
+    },
+    "34": {
+        "type": 0,
+        "val": 1024
+    },
+    "35": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i38"
+    },
+    "38": {
+        "type": 0,
+        "val": 256
+    },
+    "39": {
+        "type": 0,
+        "val": 4
+    },
+    "40": {
+        "type": 5,
+        "name": "f"
+    },
+    "41": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 5,
+        "name": "i38"
+    },
+    "43": {
+        "type": 0,
+        "val": 256
+    },
+    "44": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "45",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "i38": [
+                0,
+                4096
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "45": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "46",
+        "rhs": "67"
+    },
+    "46": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "47",
+        "indexes": [
+            "65",
+            "66"
+        ]
+    },
+    "47": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "48"
+    },
+    "48": {
+        "type": 105,
+        "inputs": [
+            "49"
+        ],
+        "expr": "50",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "49": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "50": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "51",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "51": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "52",
+        "indexes": [
+            "53",
+            "56",
+            "61",
+            "64"
+        ]
+    },
+    "52": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "53": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 5,
+        "name": "i37"
+    },
+    "55": {
+        "type": 0,
+        "val": 4
+    },
+    "56": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "57",
+        "rhs": "60"
+    },
+    "57": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 5,
+        "name": "i37"
+    },
+    "59": {
+        "type": 0,
+        "val": 2
+    },
+    "60": {
+        "type": 0,
+        "val": 2
+    },
+    "61": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "62",
+        "rhs": "63"
+    },
+    "62": {
+        "type": 5,
+        "name": "i37"
+    },
+    "63": {
+        "type": 0,
+        "val": 2
+    },
+    "64": {
+        "type": 5,
+        "name": "f"
+    },
+    "65": {
+        "type": 5,
+        "name": "i37"
+    },
+    "66": {
+        "type": 5,
+        "name": "f"
+    },
+    "67": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "68",
+        "indexes": [
+            "86",
+            "87"
+        ]
+    },
+    "68": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "69"
+    },
+    "69": {
+        "type": 105,
+        "inputs": [
+            "70"
+        ],
+        "expr": "71",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "70": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "71": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "72",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "72": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "73",
+        "indexes": [
+            "74",
+            "77",
+            "82",
+            "83"
+        ]
+    },
+    "73": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "74": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "75",
+        "rhs": "76"
+    },
+    "75": {
+        "type": 5,
+        "name": "i38"
+    },
+    "76": {
+        "type": 0,
+        "val": 1024
+    },
+    "77": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "78",
+        "rhs": "81"
+    },
+    "78": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "79",
+        "rhs": "80"
+    },
+    "79": {
+        "type": 5,
+        "name": "i38"
+    },
+    "80": {
+        "type": 0,
+        "val": 256
+    },
+    "81": {
+        "type": 0,
+        "val": 4
+    },
+    "82": {
+        "type": 5,
+        "name": "f"
+    },
+    "83": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "84",
+        "rhs": "85"
+    },
+    "84": {
+        "type": 5,
+        "name": "i38"
+    },
+    "85": {
+        "type": 0,
+        "val": 256
+    },
+    "86": {
+        "type": 5,
+        "name": "i38"
+    },
+    "87": {
+        "type": 5,
+        "name": "f"
+    },
+    "88": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "89",
+        "rhs": "98"
+    },
+    "89": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "90",
+        "rhs": "93"
+    },
+    "90": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "91",
+        "rhs": "92"
+    },
+    "91": {
+        "type": 0,
+        "val": 4
+    },
+    "92": {
+        "type": 5,
+        "name": "n"
+    },
+    "93": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "94",
+        "rhs": "95"
+    },
+    "94": {
+        "type": 0,
+        "val": 2
+    },
+    "95": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "96",
+        "rhs": "97"
+    },
+    "96": {
+        "type": 5,
+        "name": "i23"
+    },
+    "97": {
+        "type": 0,
+        "val": -1
+    },
+    "98": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "99",
+        "rhs": "100"
+    },
+    "99": {
+        "type": 5,
+        "name": "i31"
+    },
+    "100": {
+        "type": 0,
+        "val": -1
+    },
+    "101": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "102",
+        "rhs": "109"
+    },
+    "102": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "103",
+        "rhs": "106"
+    },
+    "103": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "104",
+        "rhs": "105"
+    },
+    "104": {
+        "type": 0,
+        "val": 1024
+    },
+    "105": {
+        "type": 5,
+        "name": "i34"
+    },
+    "106": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "107",
+        "rhs": "108"
+    },
+    "107": {
+        "type": 0,
+        "val": 256
+    },
+    "108": {
+        "type": 5,
+        "name": "i36"
+    },
+    "109": {
+        "type": 5,
+        "name": "c"
+    },
+    "110": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "111",
+        "rhs": "116"
+    },
+    "111": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "112",
+        "rhs": "113"
+    },
+    "112": {
+        "type": 5,
+        "name": "y2"
+    },
+    "113": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "114",
+        "rhs": "115"
+    },
+    "114": {
+        "type": 0,
+        "val": -2
+    },
+    "115": {
+        "type": 5,
+        "name": "s"
+    },
+    "116": {
+        "type": 0,
+        "val": 2
+    },
+    "117": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "118",
+        "rhs": "123"
+    },
+    "118": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "119",
+        "rhs": "120"
+    },
+    "119": {
+        "type": 5,
+        "name": "x2"
+    },
+    "120": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "121",
+        "rhs": "122"
+    },
+    "121": {
+        "type": 0,
+        "val": -2
+    },
+    "122": {
+        "type": 5,
+        "name": "r"
+    },
+    "123": {
+        "type": 0,
+        "val": 2
+    },
+    "124": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "125",
+        "rhs": "126"
+    },
+    "125": {
+        "type": 5,
+        "name": "y1"
+    },
+    "126": {
+        "type": 5,
+        "name": "s"
+    },
+    "127": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "128",
+        "rhs": "129"
+    },
+    "128": {
+        "type": 5,
+        "name": "x1"
+    },
+    "129": {
+        "type": 5,
+        "name": "r"
+    },
+    "130": {
+        "type": 5,
+        "name": "n"
+    },
+    "131": {
+        "type": 5,
+        "name": "c"
+    },
+    "132": {
+        "type": 5,
+        "name": "n"
+    },
+    "133": {
+        "type": 5,
+        "name": "c"
+    },
+    "134": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "135",
+        "rhs": "138"
+    },
+    "135": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "136",
+        "rhs": "137"
+    },
+    "136": {
+        "type": 5,
+        "name": "h"
+    },
+    "137": {
+        "type": 0,
+        "val": 1
+    },
+    "138": {
+        "type": 0,
+        "val": 2
+    },
+    "139": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "140",
+        "rhs": "143"
+    },
+    "140": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "141",
+        "rhs": "142"
+    },
+    "141": {
+        "type": 5,
+        "name": "h"
+    },
+    "142": {
+        "type": 0,
+        "val": 1
+    },
+    "143": {
+        "type": 0,
+        "val": 2
+    },
+    "144": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "145",
+        "rhs": "148"
+    },
+    "145": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "146",
+        "rhs": "147"
+    },
+    "146": {
+        "type": 5,
+        "name": "w"
+    },
+    "147": {
+        "type": 0,
+        "val": 1
+    },
+    "148": {
+        "type": 0,
+        "val": 2
+    },
+    "149": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "150",
+        "rhs": "153"
+    },
+    "150": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "151",
+        "rhs": "152"
+    },
+    "151": {
+        "type": 5,
+        "name": "w"
+    },
+    "152": {
+        "type": 0,
+        "val": 1
+    },
+    "153": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_18.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_18.expr
new file mode 100644
index 00000000..8e72dd98
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_18.expr
@@ -0,0 +1,1829 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 18 MatchMemBoundKernel: ",
+    "0": {
+        "type": 4,
+        "name": "T19",
+        "shape": [
+            16,
+            4,
+            4,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "1"
+    },
+    "1": {
+        "type": 105,
+        "inputs": [
+            "2"
+        ],
+        "expr": "84",
+        "outputShape": [
+            16,
+            4,
+            4,
+            256
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            64,
+            4096
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 101,
+        "inputs": [
+            "4",
+            "22"
+        ],
+        "expr": "40",
+        "args": [
+            1,
+            64,
+            4096,
+            448,
+            false,
+            true
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 105,
+        "inputs": [
+            "6"
+        ],
+        "expr": "7",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "7": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "8",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "9",
+        "indexes": [
+            "10",
+            "13",
+            "18",
+            "21"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "i37"
+    },
+    "12": {
+        "type": 0,
+        "val": 4
+    },
+    "13": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "14",
+        "rhs": "17"
+    },
+    "14": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i37"
+    },
+    "16": {
+        "type": 0,
+        "val": 2
+    },
+    "17": {
+        "type": 0,
+        "val": 2
+    },
+    "18": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i37"
+    },
+    "20": {
+        "type": 0,
+        "val": 2
+    },
+    "21": {
+        "type": 5,
+        "name": "f"
+    },
+    "22": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "23"
+    },
+    "23": {
+        "type": 105,
+        "inputs": [
+            "24"
+        ],
+        "expr": "25",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "24": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "25": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "26",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "26": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "27",
+        "indexes": [
+            "28",
+            "31",
+            "36",
+            "37"
+        ]
+    },
+    "27": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "28": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "i38"
+    },
+    "30": {
+        "type": 0,
+        "val": 1024
+    },
+    "31": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "32",
+        "rhs": "35"
+    },
+    "32": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "i38"
+    },
+    "34": {
+        "type": 0,
+        "val": 256
+    },
+    "35": {
+        "type": 0,
+        "val": 4
+    },
+    "36": {
+        "type": 5,
+        "name": "f"
+    },
+    "37": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "38",
+        "rhs": "39"
+    },
+    "38": {
+        "type": 5,
+        "name": "i38"
+    },
+    "39": {
+        "type": 0,
+        "val": 256
+    },
+    "40": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "41",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "i38": [
+                0,
+                4096
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "41": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "42",
+        "rhs": "63"
+    },
+    "42": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "43",
+        "indexes": [
+            "61",
+            "62"
+        ]
+    },
+    "43": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "44"
+    },
+    "44": {
+        "type": 105,
+        "inputs": [
+            "45"
+        ],
+        "expr": "46",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "45": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "46": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "47",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "47": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "48",
+        "indexes": [
+            "49",
+            "52",
+            "57",
+            "60"
+        ]
+    },
+    "48": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "49": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "50",
+        "rhs": "51"
+    },
+    "50": {
+        "type": 5,
+        "name": "i37"
+    },
+    "51": {
+        "type": 0,
+        "val": 4
+    },
+    "52": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "53",
+        "rhs": "56"
+    },
+    "53": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 5,
+        "name": "i37"
+    },
+    "55": {
+        "type": 0,
+        "val": 2
+    },
+    "56": {
+        "type": 0,
+        "val": 2
+    },
+    "57": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 5,
+        "name": "i37"
+    },
+    "59": {
+        "type": 0,
+        "val": 2
+    },
+    "60": {
+        "type": 5,
+        "name": "f"
+    },
+    "61": {
+        "type": 5,
+        "name": "i37"
+    },
+    "62": {
+        "type": 5,
+        "name": "f"
+    },
+    "63": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "64",
+        "indexes": [
+            "82",
+            "83"
+        ]
+    },
+    "64": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "65"
+    },
+    "65": {
+        "type": 105,
+        "inputs": [
+            "66"
+        ],
+        "expr": "67",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "66": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "67": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "68",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "68": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "69",
+        "indexes": [
+            "70",
+            "73",
+            "78",
+            "79"
+        ]
+    },
+    "69": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "70": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "71",
+        "rhs": "72"
+    },
+    "71": {
+        "type": 5,
+        "name": "i38"
+    },
+    "72": {
+        "type": 0,
+        "val": 1024
+    },
+    "73": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "74",
+        "rhs": "77"
+    },
+    "74": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "75",
+        "rhs": "76"
+    },
+    "75": {
+        "type": 5,
+        "name": "i38"
+    },
+    "76": {
+        "type": 0,
+        "val": 256
+    },
+    "77": {
+        "type": 0,
+        "val": 4
+    },
+    "78": {
+        "type": 5,
+        "name": "f"
+    },
+    "79": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "80",
+        "rhs": "81"
+    },
+    "80": {
+        "type": 5,
+        "name": "i38"
+    },
+    "81": {
+        "type": 0,
+        "val": 256
+    },
+    "82": {
+        "type": 5,
+        "name": "i38"
+    },
+    "83": {
+        "type": 5,
+        "name": "f"
+    },
+    "84": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "85",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "85": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "86",
+        "indexes": [
+            "216",
+            "217",
+            "218",
+            "223",
+            "228",
+            "233"
+        ]
+    },
+    "86": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "87",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "87": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "88",
+        "indexes": [
+            "194",
+            "201",
+            "208",
+            "211",
+            "214",
+            "215"
+        ]
+    },
+    "88": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            1,
+            1,
+            0,
+            0
+        ],
+        "summand": "89",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                1,
+                3
+            ],
+            "i23": [
+                1,
+                3
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "89": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "90",
+        "indexes": [
+            "172",
+            "185"
+        ]
+    },
+    "90": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            64,
+            4096
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "91"
+    },
+    "91": {
+        "type": 101,
+        "inputs": [
+            "92",
+            "110"
+        ],
+        "expr": "128",
+        "args": [
+            1,
+            64,
+            4096,
+            448,
+            false,
+            true
+        ]
+    },
+    "92": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "93"
+    },
+    "93": {
+        "type": 105,
+        "inputs": [
+            "94"
+        ],
+        "expr": "95",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "94": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "95": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "96",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "96": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "97",
+        "indexes": [
+            "98",
+            "101",
+            "106",
+            "109"
+        ]
+    },
+    "97": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "98": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "99",
+        "rhs": "100"
+    },
+    "99": {
+        "type": 5,
+        "name": "i37"
+    },
+    "100": {
+        "type": 0,
+        "val": 4
+    },
+    "101": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "102",
+        "rhs": "105"
+    },
+    "102": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "103",
+        "rhs": "104"
+    },
+    "103": {
+        "type": 5,
+        "name": "i37"
+    },
+    "104": {
+        "type": 0,
+        "val": 2
+    },
+    "105": {
+        "type": 0,
+        "val": 2
+    },
+    "106": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "107",
+        "rhs": "108"
+    },
+    "107": {
+        "type": 5,
+        "name": "i37"
+    },
+    "108": {
+        "type": 0,
+        "val": 2
+    },
+    "109": {
+        "type": 5,
+        "name": "f"
+    },
+    "110": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "111"
+    },
+    "111": {
+        "type": 105,
+        "inputs": [
+            "112"
+        ],
+        "expr": "113",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "112": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "113": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "114",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "114": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "115",
+        "indexes": [
+            "116",
+            "119",
+            "124",
+            "125"
+        ]
+    },
+    "115": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "116": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "117",
+        "rhs": "118"
+    },
+    "117": {
+        "type": 5,
+        "name": "i38"
+    },
+    "118": {
+        "type": 0,
+        "val": 1024
+    },
+    "119": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "120",
+        "rhs": "123"
+    },
+    "120": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "121",
+        "rhs": "122"
+    },
+    "121": {
+        "type": 5,
+        "name": "i38"
+    },
+    "122": {
+        "type": 0,
+        "val": 256
+    },
+    "123": {
+        "type": 0,
+        "val": 4
+    },
+    "124": {
+        "type": 5,
+        "name": "f"
+    },
+    "125": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "126",
+        "rhs": "127"
+    },
+    "126": {
+        "type": 5,
+        "name": "i38"
+    },
+    "127": {
+        "type": 0,
+        "val": 256
+    },
+    "128": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "129",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "i38": [
+                0,
+                4096
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "129": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "130",
+        "rhs": "151"
+    },
+    "130": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "131",
+        "indexes": [
+            "149",
+            "150"
+        ]
+    },
+    "131": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            64,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "132"
+    },
+    "132": {
+        "type": 105,
+        "inputs": [
+            "133"
+        ],
+        "expr": "134",
+        "outputShape": [
+            64,
+            448
+        ]
+    },
+    "133": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "134": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "135",
+        "loopVarRanges": {
+            "i37": [
+                0,
+                64
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "135": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "136",
+        "indexes": [
+            "137",
+            "140",
+            "145",
+            "148"
+        ]
+    },
+    "136": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "137": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "138",
+        "rhs": "139"
+    },
+    "138": {
+        "type": 5,
+        "name": "i37"
+    },
+    "139": {
+        "type": 0,
+        "val": 4
+    },
+    "140": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "141",
+        "rhs": "144"
+    },
+    "141": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "142",
+        "rhs": "143"
+    },
+    "142": {
+        "type": 5,
+        "name": "i37"
+    },
+    "143": {
+        "type": 0,
+        "val": 2
+    },
+    "144": {
+        "type": 0,
+        "val": 2
+    },
+    "145": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "146",
+        "rhs": "147"
+    },
+    "146": {
+        "type": 5,
+        "name": "i37"
+    },
+    "147": {
+        "type": 0,
+        "val": 2
+    },
+    "148": {
+        "type": 5,
+        "name": "f"
+    },
+    "149": {
+        "type": 5,
+        "name": "i37"
+    },
+    "150": {
+        "type": 5,
+        "name": "f"
+    },
+    "151": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "152",
+        "indexes": [
+            "170",
+            "171"
+        ]
+    },
+    "152": {
+        "type": 4,
+        "name": "T2",
+        "shape": [
+            4096,
+            448
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "153"
+    },
+    "153": {
+        "type": 105,
+        "inputs": [
+            "154"
+        ],
+        "expr": "155",
+        "outputShape": [
+            4096,
+            448
+        ]
+    },
+    "154": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "155": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "156",
+        "loopVarRanges": {
+            "i38": [
+                0,
+                4096
+            ],
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "156": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "157",
+        "indexes": [
+            "158",
+            "161",
+            "166",
+            "167"
+        ]
+    },
+    "157": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "158": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "159",
+        "rhs": "160"
+    },
+    "159": {
+        "type": 5,
+        "name": "i38"
+    },
+    "160": {
+        "type": 0,
+        "val": 1024
+    },
+    "161": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "162",
+        "rhs": "165"
+    },
+    "162": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "163",
+        "rhs": "164"
+    },
+    "163": {
+        "type": 5,
+        "name": "i38"
+    },
+    "164": {
+        "type": 0,
+        "val": 256
+    },
+    "165": {
+        "type": 0,
+        "val": 4
+    },
+    "166": {
+        "type": 5,
+        "name": "f"
+    },
+    "167": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "168",
+        "rhs": "169"
+    },
+    "168": {
+        "type": 5,
+        "name": "i38"
+    },
+    "169": {
+        "type": 0,
+        "val": 256
+    },
+    "170": {
+        "type": 5,
+        "name": "i38"
+    },
+    "171": {
+        "type": 5,
+        "name": "f"
+    },
+    "172": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "173",
+        "rhs": "182"
+    },
+    "173": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "174",
+        "rhs": "177"
+    },
+    "174": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "175",
+        "rhs": "176"
+    },
+    "175": {
+        "type": 0,
+        "val": 4
+    },
+    "176": {
+        "type": 5,
+        "name": "n"
+    },
+    "177": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "178",
+        "rhs": "179"
+    },
+    "178": {
+        "type": 0,
+        "val": 2
+    },
+    "179": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "180",
+        "rhs": "181"
+    },
+    "180": {
+        "type": 5,
+        "name": "i23"
+    },
+    "181": {
+        "type": 0,
+        "val": -1
+    },
+    "182": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "183",
+        "rhs": "184"
+    },
+    "183": {
+        "type": 5,
+        "name": "i31"
+    },
+    "184": {
+        "type": 0,
+        "val": -1
+    },
+    "185": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "186",
+        "rhs": "193"
+    },
+    "186": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "187",
+        "rhs": "190"
+    },
+    "187": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "188",
+        "rhs": "189"
+    },
+    "188": {
+        "type": 0,
+        "val": 1024
+    },
+    "189": {
+        "type": 5,
+        "name": "i34"
+    },
+    "190": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "191",
+        "rhs": "192"
+    },
+    "191": {
+        "type": 0,
+        "val": 256
+    },
+    "192": {
+        "type": 5,
+        "name": "i36"
+    },
+    "193": {
+        "type": 5,
+        "name": "c"
+    },
+    "194": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "195",
+        "rhs": "200"
+    },
+    "195": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "196",
+        "rhs": "197"
+    },
+    "196": {
+        "type": 5,
+        "name": "y2"
+    },
+    "197": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "198",
+        "rhs": "199"
+    },
+    "198": {
+        "type": 0,
+        "val": -2
+    },
+    "199": {
+        "type": 5,
+        "name": "s"
+    },
+    "200": {
+        "type": 0,
+        "val": 2
+    },
+    "201": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "202",
+        "rhs": "207"
+    },
+    "202": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "203",
+        "rhs": "204"
+    },
+    "203": {
+        "type": 5,
+        "name": "x2"
+    },
+    "204": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "205",
+        "rhs": "206"
+    },
+    "205": {
+        "type": 0,
+        "val": -2
+    },
+    "206": {
+        "type": 5,
+        "name": "r"
+    },
+    "207": {
+        "type": 0,
+        "val": 2
+    },
+    "208": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "209",
+        "rhs": "210"
+    },
+    "209": {
+        "type": 5,
+        "name": "y1"
+    },
+    "210": {
+        "type": 5,
+        "name": "s"
+    },
+    "211": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "212",
+        "rhs": "213"
+    },
+    "212": {
+        "type": 5,
+        "name": "x1"
+    },
+    "213": {
+        "type": 5,
+        "name": "r"
+    },
+    "214": {
+        "type": 5,
+        "name": "n"
+    },
+    "215": {
+        "type": 5,
+        "name": "c"
+    },
+    "216": {
+        "type": 5,
+        "name": "n"
+    },
+    "217": {
+        "type": 5,
+        "name": "c"
+    },
+    "218": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "219",
+        "rhs": "222"
+    },
+    "219": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "220",
+        "rhs": "221"
+    },
+    "220": {
+        "type": 5,
+        "name": "h"
+    },
+    "221": {
+        "type": 0,
+        "val": 1
+    },
+    "222": {
+        "type": 0,
+        "val": 2
+    },
+    "223": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "224",
+        "rhs": "227"
+    },
+    "224": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "225",
+        "rhs": "226"
+    },
+    "225": {
+        "type": 5,
+        "name": "h"
+    },
+    "226": {
+        "type": 0,
+        "val": 1
+    },
+    "227": {
+        "type": 0,
+        "val": 2
+    },
+    "228": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "229",
+        "rhs": "232"
+    },
+    "229": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "230",
+        "rhs": "231"
+    },
+    "230": {
+        "type": 5,
+        "name": "w"
+    },
+    "231": {
+        "type": 0,
+        "val": 1
+    },
+    "232": {
+        "type": 0,
+        "val": 2
+    },
+    "233": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "234",
+        "rhs": "237"
+    },
+    "234": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "235",
+        "rhs": "236"
+    },
+    "235": {
+        "type": 5,
+        "name": "w"
+    },
+    "236": {
+        "type": 0,
+        "val": 1
+    },
+    "237": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_2.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_2.expr
new file mode 100644
index 00000000..1e6756c8
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_2.expr
@@ -0,0 +1,587 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 2 Rule2VariableMerging: Old iters: [x1,r], new iters: [i23,i24] phis: [(x1 + r),r] psis: [(i23 - i24),i24]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "56",
+            "57",
+            "58",
+            "63",
+            "68",
+            "73"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 8,
+        "object": "4",
+        "indexes": [
+            "48",
+            "49",
+            "50",
+            "51",
+            "52",
+            "53",
+            "54",
+            "55"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 8,
+        "object": "6",
+        "indexes": [
+            "38",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45",
+            "46",
+            "47"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i23": [
+                0,
+                4
+            ],
+            "i24": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "20"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "9",
+        "indexes": [
+            "10",
+            "11",
+            "14",
+            "19"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 5,
+        "name": "n"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i23"
+    },
+    "13": {
+        "type": 0,
+        "val": -1
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "18"
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "s"
+    },
+    "17": {
+        "type": 5,
+        "name": "y1"
+    },
+    "18": {
+        "type": 0,
+        "val": -1
+    },
+    "19": {
+        "type": 5,
+        "name": "f"
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "22",
+            "29",
+            "36",
+            "37"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "28"
+    },
+    "23": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "24",
+        "rhs": "27"
+    },
+    "24": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 0,
+        "val": -2
+    },
+    "26": {
+        "type": 5,
+        "name": "i24"
+    },
+    "27": {
+        "type": 5,
+        "name": "x2"
+    },
+    "28": {
+        "type": 0,
+        "val": 2
+    },
+    "29": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "30",
+        "rhs": "35"
+    },
+    "30": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "31",
+        "rhs": "34"
+    },
+    "31": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 0,
+        "val": -2
+    },
+    "33": {
+        "type": 5,
+        "name": "s"
+    },
+    "34": {
+        "type": 5,
+        "name": "y2"
+    },
+    "35": {
+        "type": 0,
+        "val": 2
+    },
+    "36": {
+        "type": 5,
+        "name": "f"
+    },
+    "37": {
+        "type": 5,
+        "name": "c"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "x1"
+    },
+    "40": {
+        "type": 5,
+        "name": "r"
+    },
+    "41": {
+        "type": 5,
+        "name": "r"
+    },
+    "42": {
+        "type": 5,
+        "name": "s"
+    },
+    "43": {
+        "type": 5,
+        "name": "n"
+    },
+    "44": {
+        "type": 5,
+        "name": "c"
+    },
+    "45": {
+        "type": 5,
+        "name": "x2"
+    },
+    "46": {
+        "type": 5,
+        "name": "y1"
+    },
+    "47": {
+        "type": 5,
+        "name": "y2"
+    },
+    "48": {
+        "type": 5,
+        "name": "r"
+    },
+    "49": {
+        "type": 5,
+        "name": "s"
+    },
+    "50": {
+        "type": 5,
+        "name": "n"
+    },
+    "51": {
+        "type": 5,
+        "name": "c"
+    },
+    "52": {
+        "type": 5,
+        "name": "x1"
+    },
+    "53": {
+        "type": 5,
+        "name": "x2"
+    },
+    "54": {
+        "type": 5,
+        "name": "y1"
+    },
+    "55": {
+        "type": 5,
+        "name": "y2"
+    },
+    "56": {
+        "type": 5,
+        "name": "n"
+    },
+    "57": {
+        "type": 5,
+        "name": "c"
+    },
+    "58": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "59",
+        "rhs": "62"
+    },
+    "59": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 5,
+        "name": "h"
+    },
+    "61": {
+        "type": 0,
+        "val": 1
+    },
+    "62": {
+        "type": 0,
+        "val": 2
+    },
+    "63": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "64",
+        "rhs": "67"
+    },
+    "64": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 5,
+        "name": "h"
+    },
+    "66": {
+        "type": 0,
+        "val": 1
+    },
+    "67": {
+        "type": 0,
+        "val": 2
+    },
+    "68": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "69",
+        "rhs": "72"
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 5,
+        "name": "w"
+    },
+    "71": {
+        "type": 0,
+        "val": 1
+    },
+    "72": {
+        "type": 0,
+        "val": 2
+    },
+    "73": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "74",
+        "rhs": "77"
+    },
+    "74": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "75",
+        "rhs": "76"
+    },
+    "75": {
+        "type": 5,
+        "name": "w"
+    },
+    "76": {
+        "type": 0,
+        "val": 1
+    },
+    "77": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_3.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_3.expr
new file mode 100644
index 00000000..f48396b0
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_3.expr
@@ -0,0 +1,492 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 3 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "46",
+            "47",
+            "48",
+            "53",
+            "58",
+            "63"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 8,
+        "object": "4",
+        "indexes": [
+            "36",
+            "39",
+            "40",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i23": [
+                0,
+                4
+            ],
+            "i24": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "18"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "12",
+            "17"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i23"
+    },
+    "11": {
+        "type": 0,
+        "val": -1
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "16"
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "s"
+    },
+    "15": {
+        "type": 5,
+        "name": "y1"
+    },
+    "16": {
+        "type": 0,
+        "val": -1
+    },
+    "17": {
+        "type": 5,
+        "name": "f"
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "27",
+            "34",
+            "35"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "26"
+    },
+    "21": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "22",
+        "rhs": "25"
+    },
+    "22": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 0,
+        "val": -2
+    },
+    "24": {
+        "type": 5,
+        "name": "i24"
+    },
+    "25": {
+        "type": 5,
+        "name": "x2"
+    },
+    "26": {
+        "type": 0,
+        "val": 2
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "33"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "32"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": -2
+    },
+    "31": {
+        "type": 5,
+        "name": "s"
+    },
+    "32": {
+        "type": 5,
+        "name": "y2"
+    },
+    "33": {
+        "type": 0,
+        "val": 2
+    },
+    "34": {
+        "type": 5,
+        "name": "f"
+    },
+    "35": {
+        "type": 5,
+        "name": "c"
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "x1"
+    },
+    "38": {
+        "type": 5,
+        "name": "r"
+    },
+    "39": {
+        "type": 5,
+        "name": "r"
+    },
+    "40": {
+        "type": 5,
+        "name": "s"
+    },
+    "41": {
+        "type": 5,
+        "name": "n"
+    },
+    "42": {
+        "type": 5,
+        "name": "c"
+    },
+    "43": {
+        "type": 5,
+        "name": "x2"
+    },
+    "44": {
+        "type": 5,
+        "name": "y1"
+    },
+    "45": {
+        "type": 5,
+        "name": "y2"
+    },
+    "46": {
+        "type": 5,
+        "name": "n"
+    },
+    "47": {
+        "type": 5,
+        "name": "c"
+    },
+    "48": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "49",
+        "rhs": "52"
+    },
+    "49": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "50",
+        "rhs": "51"
+    },
+    "50": {
+        "type": 5,
+        "name": "h"
+    },
+    "51": {
+        "type": 0,
+        "val": 1
+    },
+    "52": {
+        "type": 0,
+        "val": 2
+    },
+    "53": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "54",
+        "rhs": "57"
+    },
+    "54": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 5,
+        "name": "h"
+    },
+    "56": {
+        "type": 0,
+        "val": 1
+    },
+    "57": {
+        "type": 0,
+        "val": 2
+    },
+    "58": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "59",
+        "rhs": "62"
+    },
+    "59": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 5,
+        "name": "w"
+    },
+    "61": {
+        "type": 0,
+        "val": 1
+    },
+    "62": {
+        "type": 0,
+        "val": 2
+    },
+    "63": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "64",
+        "rhs": "67"
+    },
+    "64": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 5,
+        "name": "w"
+    },
+    "66": {
+        "type": 0,
+        "val": 1
+    },
+    "67": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_4.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_4.expr
new file mode 100644
index 00000000..1ce4b5f6
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_4.expr
@@ -0,0 +1,587 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 4 Rule2VariableMerging: Old iters: [y1,s], new iters: [i31,i32] phis: [(y1 + s),s] psis: [(i31 - i32),i32]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "56",
+            "57",
+            "58",
+            "63",
+            "68",
+            "73"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 8,
+        "object": "4",
+        "indexes": [
+            "46",
+            "49",
+            "50",
+            "51",
+            "52",
+            "53",
+            "54",
+            "55"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i23": [
+                0,
+                4
+            ],
+            "i24": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 8,
+        "object": "6",
+        "indexes": [
+            "36",
+            "39",
+            "40",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4
+            ],
+            "i32": [
+                0,
+                2
+            ],
+            "i23": [
+                0,
+                4
+            ],
+            "i24": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "18"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "9",
+        "indexes": [
+            "10",
+            "11",
+            "14",
+            "17"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 5,
+        "name": "n"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i23"
+    },
+    "13": {
+        "type": 0,
+        "val": -1
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i31"
+    },
+    "16": {
+        "type": 0,
+        "val": -1
+    },
+    "17": {
+        "type": 5,
+        "name": "f"
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "27",
+            "34",
+            "35"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "26"
+    },
+    "21": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "22",
+        "rhs": "25"
+    },
+    "22": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 0,
+        "val": -2
+    },
+    "24": {
+        "type": 5,
+        "name": "i24"
+    },
+    "25": {
+        "type": 5,
+        "name": "x2"
+    },
+    "26": {
+        "type": 0,
+        "val": 2
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "33"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "32"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": -2
+    },
+    "31": {
+        "type": 5,
+        "name": "i32"
+    },
+    "32": {
+        "type": 5,
+        "name": "y2"
+    },
+    "33": {
+        "type": 0,
+        "val": 2
+    },
+    "34": {
+        "type": 5,
+        "name": "f"
+    },
+    "35": {
+        "type": 5,
+        "name": "c"
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "y1"
+    },
+    "38": {
+        "type": 5,
+        "name": "s"
+    },
+    "39": {
+        "type": 5,
+        "name": "s"
+    },
+    "40": {
+        "type": 5,
+        "name": "i23"
+    },
+    "41": {
+        "type": 5,
+        "name": "i24"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "c"
+    },
+    "44": {
+        "type": 5,
+        "name": "x2"
+    },
+    "45": {
+        "type": 5,
+        "name": "y2"
+    },
+    "46": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "47",
+        "rhs": "48"
+    },
+    "47": {
+        "type": 5,
+        "name": "x1"
+    },
+    "48": {
+        "type": 5,
+        "name": "r"
+    },
+    "49": {
+        "type": 5,
+        "name": "r"
+    },
+    "50": {
+        "type": 5,
+        "name": "s"
+    },
+    "51": {
+        "type": 5,
+        "name": "n"
+    },
+    "52": {
+        "type": 5,
+        "name": "c"
+    },
+    "53": {
+        "type": 5,
+        "name": "x2"
+    },
+    "54": {
+        "type": 5,
+        "name": "y1"
+    },
+    "55": {
+        "type": 5,
+        "name": "y2"
+    },
+    "56": {
+        "type": 5,
+        "name": "n"
+    },
+    "57": {
+        "type": 5,
+        "name": "c"
+    },
+    "58": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "59",
+        "rhs": "62"
+    },
+    "59": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 5,
+        "name": "h"
+    },
+    "61": {
+        "type": 0,
+        "val": 1
+    },
+    "62": {
+        "type": 0,
+        "val": 2
+    },
+    "63": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "64",
+        "rhs": "67"
+    },
+    "64": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 5,
+        "name": "h"
+    },
+    "66": {
+        "type": 0,
+        "val": 1
+    },
+    "67": {
+        "type": 0,
+        "val": 2
+    },
+    "68": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "69",
+        "rhs": "72"
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 5,
+        "name": "w"
+    },
+    "71": {
+        "type": 0,
+        "val": 1
+    },
+    "72": {
+        "type": 0,
+        "val": 2
+    },
+    "73": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "74",
+        "rhs": "77"
+    },
+    "74": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "75",
+        "rhs": "76"
+    },
+    "75": {
+        "type": 5,
+        "name": "w"
+    },
+    "76": {
+        "type": 0,
+        "val": 1
+    },
+    "77": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_5.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_5.expr
new file mode 100644
index 00000000..c423edd2
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_5.expr
@@ -0,0 +1,492 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 5 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "46",
+            "47",
+            "48",
+            "53",
+            "58",
+            "63"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 8,
+        "object": "4",
+        "indexes": [
+            "34",
+            "37",
+            "38",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4
+            ],
+            "i32": [
+                0,
+                2
+            ],
+            "i23": [
+                0,
+                4
+            ],
+            "i24": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "16"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "12",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i23"
+    },
+    "11": {
+        "type": 0,
+        "val": -1
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "i31"
+    },
+    "14": {
+        "type": 0,
+        "val": -1
+    },
+    "15": {
+        "type": 5,
+        "name": "f"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "25",
+            "32",
+            "33"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "24"
+    },
+    "19": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "20",
+        "rhs": "23"
+    },
+    "20": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 0,
+        "val": -2
+    },
+    "22": {
+        "type": 5,
+        "name": "i24"
+    },
+    "23": {
+        "type": 5,
+        "name": "x2"
+    },
+    "24": {
+        "type": 0,
+        "val": 2
+    },
+    "25": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "26",
+        "rhs": "31"
+    },
+    "26": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "27",
+        "rhs": "30"
+    },
+    "27": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 0,
+        "val": -2
+    },
+    "29": {
+        "type": 5,
+        "name": "i32"
+    },
+    "30": {
+        "type": 5,
+        "name": "y2"
+    },
+    "31": {
+        "type": 0,
+        "val": 2
+    },
+    "32": {
+        "type": 5,
+        "name": "f"
+    },
+    "33": {
+        "type": 5,
+        "name": "c"
+    },
+    "34": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "y1"
+    },
+    "36": {
+        "type": 5,
+        "name": "s"
+    },
+    "37": {
+        "type": 5,
+        "name": "s"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "x1"
+    },
+    "40": {
+        "type": 5,
+        "name": "r"
+    },
+    "41": {
+        "type": 5,
+        "name": "r"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "c"
+    },
+    "44": {
+        "type": 5,
+        "name": "x2"
+    },
+    "45": {
+        "type": 5,
+        "name": "y2"
+    },
+    "46": {
+        "type": 5,
+        "name": "n"
+    },
+    "47": {
+        "type": 5,
+        "name": "c"
+    },
+    "48": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "49",
+        "rhs": "52"
+    },
+    "49": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "50",
+        "rhs": "51"
+    },
+    "50": {
+        "type": 5,
+        "name": "h"
+    },
+    "51": {
+        "type": 0,
+        "val": 1
+    },
+    "52": {
+        "type": 0,
+        "val": 2
+    },
+    "53": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "54",
+        "rhs": "57"
+    },
+    "54": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 5,
+        "name": "h"
+    },
+    "56": {
+        "type": 0,
+        "val": 1
+    },
+    "57": {
+        "type": 0,
+        "val": 2
+    },
+    "58": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "59",
+        "rhs": "62"
+    },
+    "59": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 5,
+        "name": "w"
+    },
+    "61": {
+        "type": 0,
+        "val": 1
+    },
+    "62": {
+        "type": 0,
+        "val": 2
+    },
+    "63": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "64",
+        "rhs": "67"
+    },
+    "64": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 5,
+        "name": "w"
+    },
+    "66": {
+        "type": 0,
+        "val": 1
+    },
+    "67": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_6.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_6.expr
new file mode 100644
index 00000000..a34723af
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_6.expr
@@ -0,0 +1,577 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 6 Rule2VariableMerging: Old iters: [x2,i24], new iters: [i34] phis: [((x2 + (-2 * i24)) + 2)] psis: [((i34 % -2) + -2),(-1 * (i34 / -2))]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "55",
+            "56",
+            "57",
+            "62",
+            "67",
+            "72"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 8,
+        "object": "4",
+        "indexes": [
+            "43",
+            "46",
+            "47",
+            "50",
+            "51",
+            "52",
+            "53",
+            "54"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4
+            ],
+            "i32": [
+                0,
+                2
+            ],
+            "i23": [
+                0,
+                4
+            ],
+            "i24": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 7,
+        "object": "6",
+        "indexes": [
+            "30",
+            "37",
+            "38",
+            "39",
+            "40",
+            "41",
+            "42"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                0,
+                4
+            ],
+            "i32": [
+                0,
+                2
+            ],
+            "i23": [
+                0,
+                4
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "18"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "9",
+        "indexes": [
+            "10",
+            "11",
+            "14",
+            "17"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 5,
+        "name": "n"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i23"
+    },
+    "13": {
+        "type": 0,
+        "val": -1
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i31"
+    },
+    "16": {
+        "type": 0,
+        "val": -1
+    },
+    "17": {
+        "type": 5,
+        "name": "f"
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "21",
+            "28",
+            "29"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 5,
+        "name": "i34"
+    },
+    "21": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "22",
+        "rhs": "27"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "26"
+    },
+    "23": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 0,
+        "val": -2
+    },
+    "25": {
+        "type": 5,
+        "name": "i32"
+    },
+    "26": {
+        "type": 5,
+        "name": "y2"
+    },
+    "27": {
+        "type": 0,
+        "val": 2
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 5,
+        "name": "c"
+    },
+    "30": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "31",
+        "rhs": "36"
+    },
+    "31": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "x2"
+    },
+    "33": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 0,
+        "val": -2
+    },
+    "35": {
+        "type": 5,
+        "name": "i24"
+    },
+    "36": {
+        "type": 0,
+        "val": 2
+    },
+    "37": {
+        "type": 5,
+        "name": "i31"
+    },
+    "38": {
+        "type": 5,
+        "name": "i32"
+    },
+    "39": {
+        "type": 5,
+        "name": "i23"
+    },
+    "40": {
+        "type": 5,
+        "name": "n"
+    },
+    "41": {
+        "type": 5,
+        "name": "c"
+    },
+    "42": {
+        "type": 5,
+        "name": "y2"
+    },
+    "43": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "y1"
+    },
+    "45": {
+        "type": 5,
+        "name": "s"
+    },
+    "46": {
+        "type": 5,
+        "name": "s"
+    },
+    "47": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "48",
+        "rhs": "49"
+    },
+    "48": {
+        "type": 5,
+        "name": "x1"
+    },
+    "49": {
+        "type": 5,
+        "name": "r"
+    },
+    "50": {
+        "type": 5,
+        "name": "r"
+    },
+    "51": {
+        "type": 5,
+        "name": "n"
+    },
+    "52": {
+        "type": 5,
+        "name": "c"
+    },
+    "53": {
+        "type": 5,
+        "name": "x2"
+    },
+    "54": {
+        "type": 5,
+        "name": "y2"
+    },
+    "55": {
+        "type": 5,
+        "name": "n"
+    },
+    "56": {
+        "type": 5,
+        "name": "c"
+    },
+    "57": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "58",
+        "rhs": "61"
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "60"
+    },
+    "59": {
+        "type": 5,
+        "name": "h"
+    },
+    "60": {
+        "type": 0,
+        "val": 1
+    },
+    "61": {
+        "type": 0,
+        "val": 2
+    },
+    "62": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "63",
+        "rhs": "66"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 5,
+        "name": "h"
+    },
+    "65": {
+        "type": 0,
+        "val": 1
+    },
+    "66": {
+        "type": 0,
+        "val": 2
+    },
+    "67": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "68",
+        "rhs": "71"
+    },
+    "68": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 5,
+        "name": "w"
+    },
+    "70": {
+        "type": 0,
+        "val": 1
+    },
+    "71": {
+        "type": 0,
+        "val": 2
+    },
+    "72": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "73",
+        "rhs": "76"
+    },
+    "73": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "74",
+        "rhs": "75"
+    },
+    "74": {
+        "type": 5,
+        "name": "w"
+    },
+    "75": {
+        "type": 0,
+        "val": 1
+    },
+    "76": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_7.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_7.expr
new file mode 100644
index 00000000..8caf2ffb
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_7.expr
@@ -0,0 +1,482 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 7 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "45",
+            "46",
+            "47",
+            "52",
+            "57",
+            "62"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 7,
+        "object": "4",
+        "indexes": [
+            "28",
+            "35",
+            "38",
+            "39",
+            "42",
+            "43",
+            "44"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                0,
+                4
+            ],
+            "i32": [
+                0,
+                2
+            ],
+            "i23": [
+                0,
+                4
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "16"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "12",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i23"
+    },
+    "11": {
+        "type": 0,
+        "val": -1
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "i31"
+    },
+    "14": {
+        "type": 0,
+        "val": -1
+    },
+    "15": {
+        "type": 5,
+        "name": "f"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "26",
+            "27"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "i34"
+    },
+    "19": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "20",
+        "rhs": "25"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "24"
+    },
+    "21": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 0,
+        "val": -2
+    },
+    "23": {
+        "type": 5,
+        "name": "i32"
+    },
+    "24": {
+        "type": 5,
+        "name": "y2"
+    },
+    "25": {
+        "type": 0,
+        "val": 2
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "c"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "34"
+    },
+    "29": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "x2"
+    },
+    "31": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 0,
+        "val": -2
+    },
+    "33": {
+        "type": 5,
+        "name": "r"
+    },
+    "34": {
+        "type": 0,
+        "val": 2
+    },
+    "35": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "36",
+        "rhs": "37"
+    },
+    "36": {
+        "type": 5,
+        "name": "y1"
+    },
+    "37": {
+        "type": 5,
+        "name": "s"
+    },
+    "38": {
+        "type": 5,
+        "name": "s"
+    },
+    "39": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 5,
+        "name": "x1"
+    },
+    "41": {
+        "type": 5,
+        "name": "r"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "c"
+    },
+    "44": {
+        "type": 5,
+        "name": "y2"
+    },
+    "45": {
+        "type": 5,
+        "name": "n"
+    },
+    "46": {
+        "type": 5,
+        "name": "c"
+    },
+    "47": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "48",
+        "rhs": "51"
+    },
+    "48": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "49",
+        "rhs": "50"
+    },
+    "49": {
+        "type": 5,
+        "name": "h"
+    },
+    "50": {
+        "type": 0,
+        "val": 1
+    },
+    "51": {
+        "type": 0,
+        "val": 2
+    },
+    "52": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "53",
+        "rhs": "56"
+    },
+    "53": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 5,
+        "name": "h"
+    },
+    "55": {
+        "type": 0,
+        "val": 1
+    },
+    "56": {
+        "type": 0,
+        "val": 2
+    },
+    "57": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "58",
+        "rhs": "61"
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "60"
+    },
+    "59": {
+        "type": 5,
+        "name": "w"
+    },
+    "60": {
+        "type": 0,
+        "val": 1
+    },
+    "61": {
+        "type": 0,
+        "val": 2
+    },
+    "62": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "63",
+        "rhs": "66"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 5,
+        "name": "w"
+    },
+    "65": {
+        "type": 0,
+        "val": 1
+    },
+    "66": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_8.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_8.expr
new file mode 100644
index 00000000..eb7c929a
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_8.expr
@@ -0,0 +1,557 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 8 Rule2VariableMerging: Old iters: [y2,i32], new iters: [i36] phis: [((y2 + (-2 * i32)) + 2)] psis: [((i36 % -2) + -2),(-1 * (i36 / -2))]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "53",
+            "54",
+            "55",
+            "60",
+            "65",
+            "70"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 7,
+        "object": "4",
+        "indexes": [
+            "36",
+            "43",
+            "46",
+            "47",
+            "50",
+            "51",
+            "52"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                0,
+                4
+            ],
+            "i32": [
+                0,
+                2
+            ],
+            "i23": [
+                0,
+                4
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "6",
+        "indexes": [
+            "24",
+            "31",
+            "32",
+            "33",
+            "34",
+            "35"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                0,
+                4
+            ],
+            "i23": [
+                0,
+                4
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "18"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "9",
+        "indexes": [
+            "10",
+            "11",
+            "14",
+            "17"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 5,
+        "name": "n"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i23"
+    },
+    "13": {
+        "type": 0,
+        "val": -1
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i31"
+    },
+    "16": {
+        "type": 0,
+        "val": -1
+    },
+    "17": {
+        "type": 5,
+        "name": "f"
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "21",
+            "22",
+            "23"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 5,
+        "name": "i34"
+    },
+    "21": {
+        "type": 5,
+        "name": "i36"
+    },
+    "22": {
+        "type": 5,
+        "name": "f"
+    },
+    "23": {
+        "type": 5,
+        "name": "c"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "30"
+    },
+    "25": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 5,
+        "name": "y2"
+    },
+    "27": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 0,
+        "val": -2
+    },
+    "29": {
+        "type": 5,
+        "name": "i32"
+    },
+    "30": {
+        "type": 0,
+        "val": 2
+    },
+    "31": {
+        "type": 5,
+        "name": "i34"
+    },
+    "32": {
+        "type": 5,
+        "name": "i31"
+    },
+    "33": {
+        "type": 5,
+        "name": "i23"
+    },
+    "34": {
+        "type": 5,
+        "name": "n"
+    },
+    "35": {
+        "type": 5,
+        "name": "c"
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "42"
+    },
+    "37": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "38",
+        "rhs": "39"
+    },
+    "38": {
+        "type": 5,
+        "name": "x2"
+    },
+    "39": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 0,
+        "val": -2
+    },
+    "41": {
+        "type": 5,
+        "name": "r"
+    },
+    "42": {
+        "type": 0,
+        "val": 2
+    },
+    "43": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "y1"
+    },
+    "45": {
+        "type": 5,
+        "name": "s"
+    },
+    "46": {
+        "type": 5,
+        "name": "s"
+    },
+    "47": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "48",
+        "rhs": "49"
+    },
+    "48": {
+        "type": 5,
+        "name": "x1"
+    },
+    "49": {
+        "type": 5,
+        "name": "r"
+    },
+    "50": {
+        "type": 5,
+        "name": "n"
+    },
+    "51": {
+        "type": 5,
+        "name": "c"
+    },
+    "52": {
+        "type": 5,
+        "name": "y2"
+    },
+    "53": {
+        "type": 5,
+        "name": "n"
+    },
+    "54": {
+        "type": 5,
+        "name": "c"
+    },
+    "55": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "56",
+        "rhs": "59"
+    },
+    "56": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "57",
+        "rhs": "58"
+    },
+    "57": {
+        "type": 5,
+        "name": "h"
+    },
+    "58": {
+        "type": 0,
+        "val": 1
+    },
+    "59": {
+        "type": 0,
+        "val": 2
+    },
+    "60": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "61",
+        "rhs": "64"
+    },
+    "61": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "62",
+        "rhs": "63"
+    },
+    "62": {
+        "type": 5,
+        "name": "h"
+    },
+    "63": {
+        "type": 0,
+        "val": 1
+    },
+    "64": {
+        "type": 0,
+        "val": 2
+    },
+    "65": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "66",
+        "rhs": "69"
+    },
+    "66": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "67",
+        "rhs": "68"
+    },
+    "67": {
+        "type": 5,
+        "name": "w"
+    },
+    "68": {
+        "type": 0,
+        "val": 1
+    },
+    "69": {
+        "type": 0,
+        "val": 2
+    },
+    "70": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "71",
+        "rhs": "74"
+    },
+    "71": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "72",
+        "rhs": "73"
+    },
+    "72": {
+        "type": 5,
+        "name": "w"
+    },
+    "73": {
+        "type": 0,
+        "val": 1
+    },
+    "74": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_9.expr b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_9.expr
new file mode 100644
index 00000000..92d391bd
--- /dev/null
+++ b/test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_9.expr
@@ -0,0 +1,472 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 9 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "h": [
+                0,
+                4
+            ],
+            "w": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                256
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "44",
+            "45",
+            "46",
+            "51",
+            "56",
+            "61"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ],
+            "x1": [
+                0,
+                3
+            ],
+            "x2": [
+                0,
+                2
+            ],
+            "y1": [
+                0,
+                3
+            ],
+            "y2": [
+                0,
+                2
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                2
+            ],
+            "s": [
+                0,
+                2
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "22",
+            "29",
+            "36",
+            "39",
+            "42",
+            "43"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i36": [
+                0,
+                4
+            ],
+            "i34": [
+                0,
+                4
+            ],
+            "i31": [
+                0,
+                4
+            ],
+            "i23": [
+                0,
+                4
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "c": [
+                0,
+                256
+            ]
+        },
+        "sumVarRanges": {
+            "f": [
+                0,
+                448
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "16"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "12",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            2,
+            2,
+            448
+        ],
+        "paddings": [
+            0,
+            2,
+            2,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i23"
+    },
+    "11": {
+        "type": 0,
+        "val": -1
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "i31"
+    },
+    "14": {
+        "type": 0,
+        "val": -1
+    },
+    "15": {
+        "type": 5,
+        "name": "f"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            4,
+            4,
+            448,
+            256
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "i34"
+    },
+    "19": {
+        "type": 5,
+        "name": "i36"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "28"
+    },
+    "23": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "y2"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": -2
+    },
+    "27": {
+        "type": 5,
+        "name": "s"
+    },
+    "28": {
+        "type": 0,
+        "val": 2
+    },
+    "29": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "30",
+        "rhs": "35"
+    },
+    "30": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 5,
+        "name": "x2"
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 0,
+        "val": -2
+    },
+    "34": {
+        "type": 5,
+        "name": "r"
+    },
+    "35": {
+        "type": 0,
+        "val": 2
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "y1"
+    },
+    "38": {
+        "type": 5,
+        "name": "s"
+    },
+    "39": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 5,
+        "name": "x1"
+    },
+    "41": {
+        "type": 5,
+        "name": "r"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "c"
+    },
+    "44": {
+        "type": 5,
+        "name": "n"
+    },
+    "45": {
+        "type": 5,
+        "name": "c"
+    },
+    "46": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "47",
+        "rhs": "50"
+    },
+    "47": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "48",
+        "rhs": "49"
+    },
+    "48": {
+        "type": 5,
+        "name": "h"
+    },
+    "49": {
+        "type": 0,
+        "val": 1
+    },
+    "50": {
+        "type": 0,
+        "val": 2
+    },
+    "51": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "52",
+        "rhs": "55"
+    },
+    "52": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "53",
+        "rhs": "54"
+    },
+    "53": {
+        "type": 5,
+        "name": "h"
+    },
+    "54": {
+        "type": 0,
+        "val": 1
+    },
+    "55": {
+        "type": 0,
+        "val": 2
+    },
+    "56": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "57",
+        "rhs": "60"
+    },
+    "57": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 5,
+        "name": "w"
+    },
+    "59": {
+        "type": 0,
+        "val": 1
+    },
+    "60": {
+        "type": 0,
+        "val": 2
+    },
+    "61": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "62",
+        "rhs": "65"
+    },
+    "62": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "63",
+        "rhs": "64"
+    },
+    "63": {
+        "type": 5,
+        "name": "w"
+    },
+    "64": {
+        "type": 0,
+        "val": 1
+    },
+    "65": {
+        "type": 0,
+        "val": 2
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_0.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_0.expr
new file mode 100644
index 00000000..e75fba5e
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_0.expr
@@ -0,0 +1,180 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 0 Init: ruleBasedDFS: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                32
+            ],
+            "r": [
+                0,
+                5
+            ],
+            "s": [
+                0,
+                5
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "16"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "11"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            2,
+            2
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "10"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "h"
+    },
+    "9": {
+        "type": 5,
+        "name": "r"
+    },
+    "10": {
+        "type": 0,
+        "val": -2
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "15"
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "w"
+    },
+    "14": {
+        "type": 5,
+        "name": "s"
+    },
+    "15": {
+        "type": 0,
+        "val": -2
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "f"
+    },
+    "19": {
+        "type": 5,
+        "name": "c"
+    },
+    "20": {
+        "type": 5,
+        "name": "r"
+    },
+    "21": {
+        "type": 5,
+        "name": "s"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_1.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_1.expr
new file mode 100644
index 00000000..854ee606
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_1.expr
@@ -0,0 +1,180 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 1 Rule9RangeMagnify: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                32
+            ],
+            "r": [
+                0,
+                6
+            ],
+            "s": [
+                0,
+                6
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "16"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "11"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "10"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "h"
+    },
+    "9": {
+        "type": 5,
+        "name": "r"
+    },
+    "10": {
+        "type": 0,
+        "val": -2
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "15"
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "w"
+    },
+    "14": {
+        "type": 5,
+        "name": "s"
+    },
+    "15": {
+        "type": 0,
+        "val": -2
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "f"
+    },
+    "19": {
+        "type": 5,
+        "name": "c"
+    },
+    "20": {
+        "type": 5,
+        "name": "r"
+    },
+    "21": {
+        "type": 5,
+        "name": "s"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_10.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_10.expr
new file mode 100644
index 00000000..17aa68c1
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_10.expr
@@ -0,0 +1,572 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 10 Rule8GuidedDLT: Toward Conv. guidedDLTMoreVar2 ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "61",
+            "66",
+            "67",
+            "72",
+            "73",
+            "74"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i63": [
+                0,
+                226
+            ],
+            "i64": [
+                0,
+                2
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i60": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "49",
+            "58",
+            "59",
+            "60"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "i63": [
+                0,
+                226
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "n": [
+                0,
+                16
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "20"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i59"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i14"
+    },
+    "18": {
+        "type": 5,
+        "name": "i63"
+    },
+    "19": {
+        "type": 0,
+        "val": -2
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "45",
+            "46",
+            "47",
+            "48"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "22"
+    },
+    "22": {
+        "type": 105,
+        "inputs": [
+            "23"
+        ],
+        "expr": "24",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "24": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "25",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "25": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "26",
+        "indexes": [
+            "27",
+            "30",
+            "31",
+            "38"
+        ]
+    },
+    "26": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "27": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i65"
+    },
+    "29": {
+        "type": 0,
+        "val": 4
+    },
+    "30": {
+        "type": 5,
+        "name": "c"
+    },
+    "31": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "32",
+        "rhs": "37"
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 0,
+        "val": 3
+    },
+    "34": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "i65"
+    },
+    "36": {
+        "type": 0,
+        "val": 2
+    },
+    "37": {
+        "type": 5,
+        "name": "i4"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "44"
+    },
+    "39": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 0,
+        "val": 3
+    },
+    "41": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 5,
+        "name": "i65"
+    },
+    "43": {
+        "type": 0,
+        "val": 2
+    },
+    "44": {
+        "type": 5,
+        "name": "i14"
+    },
+    "45": {
+        "type": 5,
+        "name": "i65"
+    },
+    "46": {
+        "type": 5,
+        "name": "c"
+    },
+    "47": {
+        "type": 5,
+        "name": "i4"
+    },
+    "48": {
+        "type": 5,
+        "name": "i14"
+    },
+    "49": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "50",
+        "rhs": "57"
+    },
+    "50": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "51",
+        "rhs": "54"
+    },
+    "51": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "52",
+        "rhs": "53"
+    },
+    "52": {
+        "type": 0,
+        "val": 4
+    },
+    "53": {
+        "type": 5,
+        "name": "f"
+    },
+    "54": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 0,
+        "val": 2
+    },
+    "56": {
+        "type": 5,
+        "name": "i60"
+    },
+    "57": {
+        "type": 5,
+        "name": "i64"
+    },
+    "58": {
+        "type": 5,
+        "name": "i63"
+    },
+    "59": {
+        "type": 5,
+        "name": "i59"
+    },
+    "60": {
+        "type": 5,
+        "name": "n"
+    },
+    "61": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "62",
+        "rhs": "63"
+    },
+    "62": {
+        "type": 5,
+        "name": "w"
+    },
+    "63": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 0,
+        "val": 3
+    },
+    "65": {
+        "type": 5,
+        "name": "i13"
+    },
+    "66": {
+        "type": 5,
+        "name": "i13"
+    },
+    "67": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "68",
+        "rhs": "69"
+    },
+    "68": {
+        "type": 5,
+        "name": "h"
+    },
+    "69": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 0,
+        "val": 3
+    },
+    "71": {
+        "type": 5,
+        "name": "i3"
+    },
+    "72": {
+        "type": 5,
+        "name": "i3"
+    },
+    "73": {
+        "type": 5,
+        "name": "n"
+    },
+    "74": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_11.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_11.expr
new file mode 100644
index 00000000..1af9ef99
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_11.expr
@@ -0,0 +1,627 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 11 Rule8GuidedDLT: Toward Conv. guidedDLTDLMismatch ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "67",
+            "72",
+            "73",
+            "78",
+            "79",
+            "80"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i63": [
+                0,
+                226
+            ],
+            "i64": [
+                0,
+                2
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i60": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "55",
+            "64",
+            "65",
+            "66"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "i63": [
+                0,
+                226
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "n": [
+                0,
+                16
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "6",
+        "indexes": [
+            "51",
+            "52",
+            "53",
+            "54"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "i65": [
+                0,
+                4
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "22"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "9",
+        "indexes": [
+            "10",
+            "11",
+            "12",
+            "17"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 5,
+        "name": "n"
+    },
+    "11": {
+        "type": 5,
+        "name": "c"
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "16"
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "i4"
+    },
+    "15": {
+        "type": 5,
+        "name": "i59"
+    },
+    "16": {
+        "type": 0,
+        "val": -2
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "21"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i14"
+    },
+    "20": {
+        "type": 5,
+        "name": "i63"
+    },
+    "21": {
+        "type": 0,
+        "val": -2
+    },
+    "22": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "23",
+        "indexes": [
+            "47",
+            "48",
+            "49",
+            "50"
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "24"
+    },
+    "24": {
+        "type": 105,
+        "inputs": [
+            "25"
+        ],
+        "expr": "26",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "27",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "27": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "28",
+        "indexes": [
+            "29",
+            "32",
+            "33",
+            "40"
+        ]
+    },
+    "28": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "29": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "i65"
+    },
+    "31": {
+        "type": 0,
+        "val": 4
+    },
+    "32": {
+        "type": 5,
+        "name": "c"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "39"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i65"
+    },
+    "38": {
+        "type": 0,
+        "val": 2
+    },
+    "39": {
+        "type": 5,
+        "name": "i4"
+    },
+    "40": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "41",
+        "rhs": "46"
+    },
+    "41": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 0,
+        "val": 3
+    },
+    "43": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "i65"
+    },
+    "45": {
+        "type": 0,
+        "val": 2
+    },
+    "46": {
+        "type": 5,
+        "name": "i14"
+    },
+    "47": {
+        "type": 5,
+        "name": "i65"
+    },
+    "48": {
+        "type": 5,
+        "name": "c"
+    },
+    "49": {
+        "type": 5,
+        "name": "i4"
+    },
+    "50": {
+        "type": 5,
+        "name": "i14"
+    },
+    "51": {
+        "type": 5,
+        "name": "n"
+    },
+    "52": {
+        "type": 5,
+        "name": "i65"
+    },
+    "53": {
+        "type": 5,
+        "name": "i59"
+    },
+    "54": {
+        "type": 5,
+        "name": "i63"
+    },
+    "55": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "56",
+        "rhs": "63"
+    },
+    "56": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "57",
+        "rhs": "60"
+    },
+    "57": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 0,
+        "val": 4
+    },
+    "59": {
+        "type": 5,
+        "name": "f"
+    },
+    "60": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "61",
+        "rhs": "62"
+    },
+    "61": {
+        "type": 0,
+        "val": 2
+    },
+    "62": {
+        "type": 5,
+        "name": "i60"
+    },
+    "63": {
+        "type": 5,
+        "name": "i64"
+    },
+    "64": {
+        "type": 5,
+        "name": "i63"
+    },
+    "65": {
+        "type": 5,
+        "name": "i59"
+    },
+    "66": {
+        "type": 5,
+        "name": "n"
+    },
+    "67": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "68",
+        "rhs": "69"
+    },
+    "68": {
+        "type": 5,
+        "name": "w"
+    },
+    "69": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 0,
+        "val": 3
+    },
+    "71": {
+        "type": 5,
+        "name": "i13"
+    },
+    "72": {
+        "type": 5,
+        "name": "i13"
+    },
+    "73": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "74",
+        "rhs": "75"
+    },
+    "74": {
+        "type": 5,
+        "name": "h"
+    },
+    "75": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "76",
+        "rhs": "77"
+    },
+    "76": {
+        "type": 0,
+        "val": 3
+    },
+    "77": {
+        "type": 5,
+        "name": "i3"
+    },
+    "78": {
+        "type": 5,
+        "name": "i3"
+    },
+    "79": {
+        "type": 5,
+        "name": "n"
+    },
+    "80": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_12.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_12.expr
new file mode 100644
index 00000000..10a8a8a2
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_12.expr
@@ -0,0 +1,572 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 12 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "61",
+            "66",
+            "67",
+            "72",
+            "73",
+            "74"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i63": [
+                0,
+                226
+            ],
+            "i64": [
+                0,
+                2
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i60": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "49",
+            "50",
+            "59",
+            "60"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "i65": [
+                0,
+                4
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "20"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i59"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i14"
+    },
+    "18": {
+        "type": 5,
+        "name": "i63"
+    },
+    "19": {
+        "type": 0,
+        "val": -2
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "45",
+            "46",
+            "47",
+            "48"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "22"
+    },
+    "22": {
+        "type": 105,
+        "inputs": [
+            "23"
+        ],
+        "expr": "24",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "24": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "25",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "25": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "26",
+        "indexes": [
+            "27",
+            "30",
+            "31",
+            "38"
+        ]
+    },
+    "26": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "27": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i65"
+    },
+    "29": {
+        "type": 0,
+        "val": 4
+    },
+    "30": {
+        "type": 5,
+        "name": "c"
+    },
+    "31": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "32",
+        "rhs": "37"
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 0,
+        "val": 3
+    },
+    "34": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "i65"
+    },
+    "36": {
+        "type": 0,
+        "val": 2
+    },
+    "37": {
+        "type": 5,
+        "name": "i4"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "44"
+    },
+    "39": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 0,
+        "val": 3
+    },
+    "41": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 5,
+        "name": "i65"
+    },
+    "43": {
+        "type": 0,
+        "val": 2
+    },
+    "44": {
+        "type": 5,
+        "name": "i14"
+    },
+    "45": {
+        "type": 5,
+        "name": "i65"
+    },
+    "46": {
+        "type": 5,
+        "name": "c"
+    },
+    "47": {
+        "type": 5,
+        "name": "i4"
+    },
+    "48": {
+        "type": 5,
+        "name": "i14"
+    },
+    "49": {
+        "type": 5,
+        "name": "n"
+    },
+    "50": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "51",
+        "rhs": "58"
+    },
+    "51": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "52",
+        "rhs": "55"
+    },
+    "52": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "53",
+        "rhs": "54"
+    },
+    "53": {
+        "type": 0,
+        "val": 4
+    },
+    "54": {
+        "type": 5,
+        "name": "f"
+    },
+    "55": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "56",
+        "rhs": "57"
+    },
+    "56": {
+        "type": 0,
+        "val": 2
+    },
+    "57": {
+        "type": 5,
+        "name": "i60"
+    },
+    "58": {
+        "type": 5,
+        "name": "i64"
+    },
+    "59": {
+        "type": 5,
+        "name": "i59"
+    },
+    "60": {
+        "type": 5,
+        "name": "i63"
+    },
+    "61": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "62",
+        "rhs": "63"
+    },
+    "62": {
+        "type": 5,
+        "name": "w"
+    },
+    "63": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 0,
+        "val": 3
+    },
+    "65": {
+        "type": 5,
+        "name": "i13"
+    },
+    "66": {
+        "type": 5,
+        "name": "i13"
+    },
+    "67": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "68",
+        "rhs": "69"
+    },
+    "68": {
+        "type": 5,
+        "name": "h"
+    },
+    "69": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 0,
+        "val": 3
+    },
+    "71": {
+        "type": 5,
+        "name": "i3"
+    },
+    "72": {
+        "type": 5,
+        "name": "i3"
+    },
+    "73": {
+        "type": 5,
+        "name": "n"
+    },
+    "74": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_13.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_13.expr
new file mode 100644
index 00000000..be83acb8
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_13.expr
@@ -0,0 +1,811 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 13 Rule6KenerlMatching: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "88",
+            "93",
+            "94",
+            "99",
+            "100",
+            "101"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i63": [
+                0,
+                226
+            ],
+            "i64": [
+                0,
+                2
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i60": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "76",
+            "77",
+            "86",
+            "87"
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T7",
+        "shape": [
+            16,
+            4,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 102,
+        "inputs": [
+            "6",
+            "7"
+        ],
+        "expr": "31",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "7": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "8"
+    },
+    "8": {
+        "type": 105,
+        "inputs": [
+            "9"
+        ],
+        "expr": "10",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "11",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "11": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "12",
+        "indexes": [
+            "13",
+            "16",
+            "17",
+            "24"
+        ]
+    },
+    "12": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "13": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "i65"
+    },
+    "15": {
+        "type": 0,
+        "val": 4
+    },
+    "16": {
+        "type": 5,
+        "name": "c"
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "23"
+    },
+    "18": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 0,
+        "val": 3
+    },
+    "20": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "i65"
+    },
+    "22": {
+        "type": 0,
+        "val": 2
+    },
+    "23": {
+        "type": 5,
+        "name": "i4"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "30"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 3
+    },
+    "27": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i65"
+    },
+    "29": {
+        "type": 0,
+        "val": 2
+    },
+    "30": {
+        "type": 5,
+        "name": "i14"
+    },
+    "31": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "32",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "i65": [
+                0,
+                4
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "47"
+    },
+    "33": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "34",
+        "indexes": [
+            "35",
+            "36",
+            "37",
+            "42"
+        ]
+    },
+    "34": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "35": {
+        "type": 5,
+        "name": "n"
+    },
+    "36": {
+        "type": 5,
+        "name": "c"
+    },
+    "37": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "38",
+        "rhs": "41"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "i4"
+    },
+    "40": {
+        "type": 5,
+        "name": "i59"
+    },
+    "41": {
+        "type": 0,
+        "val": -2
+    },
+    "42": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "43",
+        "rhs": "46"
+    },
+    "43": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "i14"
+    },
+    "45": {
+        "type": 5,
+        "name": "i63"
+    },
+    "46": {
+        "type": 0,
+        "val": -2
+    },
+    "47": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "48",
+        "indexes": [
+            "72",
+            "73",
+            "74",
+            "75"
+        ]
+    },
+    "48": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "49"
+    },
+    "49": {
+        "type": 105,
+        "inputs": [
+            "50"
+        ],
+        "expr": "51",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "50": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "51": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "52",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "52": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "53",
+        "indexes": [
+            "54",
+            "57",
+            "58",
+            "65"
+        ]
+    },
+    "53": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "54": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 5,
+        "name": "i65"
+    },
+    "56": {
+        "type": 0,
+        "val": 4
+    },
+    "57": {
+        "type": 5,
+        "name": "c"
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "64"
+    },
+    "59": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 0,
+        "val": 3
+    },
+    "61": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "62",
+        "rhs": "63"
+    },
+    "62": {
+        "type": 5,
+        "name": "i65"
+    },
+    "63": {
+        "type": 0,
+        "val": 2
+    },
+    "64": {
+        "type": 5,
+        "name": "i4"
+    },
+    "65": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "66",
+        "rhs": "71"
+    },
+    "66": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "67",
+        "rhs": "68"
+    },
+    "67": {
+        "type": 0,
+        "val": 3
+    },
+    "68": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 5,
+        "name": "i65"
+    },
+    "70": {
+        "type": 0,
+        "val": 2
+    },
+    "71": {
+        "type": 5,
+        "name": "i14"
+    },
+    "72": {
+        "type": 5,
+        "name": "i65"
+    },
+    "73": {
+        "type": 5,
+        "name": "c"
+    },
+    "74": {
+        "type": 5,
+        "name": "i4"
+    },
+    "75": {
+        "type": 5,
+        "name": "i14"
+    },
+    "76": {
+        "type": 5,
+        "name": "n"
+    },
+    "77": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "78",
+        "rhs": "85"
+    },
+    "78": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "79",
+        "rhs": "82"
+    },
+    "79": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "80",
+        "rhs": "81"
+    },
+    "80": {
+        "type": 0,
+        "val": 4
+    },
+    "81": {
+        "type": 5,
+        "name": "f"
+    },
+    "82": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "83",
+        "rhs": "84"
+    },
+    "83": {
+        "type": 0,
+        "val": 2
+    },
+    "84": {
+        "type": 5,
+        "name": "i60"
+    },
+    "85": {
+        "type": 5,
+        "name": "i64"
+    },
+    "86": {
+        "type": 5,
+        "name": "i59"
+    },
+    "87": {
+        "type": 5,
+        "name": "i63"
+    },
+    "88": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "89",
+        "rhs": "90"
+    },
+    "89": {
+        "type": 5,
+        "name": "w"
+    },
+    "90": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "91",
+        "rhs": "92"
+    },
+    "91": {
+        "type": 0,
+        "val": 3
+    },
+    "92": {
+        "type": 5,
+        "name": "i13"
+    },
+    "93": {
+        "type": 5,
+        "name": "i13"
+    },
+    "94": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "95",
+        "rhs": "96"
+    },
+    "95": {
+        "type": 5,
+        "name": "h"
+    },
+    "96": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "97",
+        "rhs": "98"
+    },
+    "97": {
+        "type": 0,
+        "val": 3
+    },
+    "98": {
+        "type": 5,
+        "name": "i3"
+    },
+    "99": {
+        "type": 5,
+        "name": "i3"
+    },
+    "100": {
+        "type": 5,
+        "name": "n"
+    },
+    "101": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_14.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_14.expr
new file mode 100644
index 00000000..d12eb64d
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_14.expr
@@ -0,0 +1,736 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 14 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "2",
+        "indexes": [
+            "74",
+            "75",
+            "84",
+            "89"
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T7",
+        "shape": [
+            16,
+            4,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 102,
+        "inputs": [
+            "4",
+            "5"
+        ],
+        "expr": "29",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "5": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "9",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "9": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "10",
+        "indexes": [
+            "11",
+            "14",
+            "15",
+            "22"
+        ]
+    },
+    "10": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "11": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i65"
+    },
+    "13": {
+        "type": 0,
+        "val": 4
+    },
+    "14": {
+        "type": 5,
+        "name": "c"
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "21"
+    },
+    "16": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 0,
+        "val": 3
+    },
+    "18": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i65"
+    },
+    "20": {
+        "type": 0,
+        "val": 2
+    },
+    "21": {
+        "type": 5,
+        "name": "i4"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "28"
+    },
+    "23": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 0,
+        "val": 3
+    },
+    "25": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 5,
+        "name": "i65"
+    },
+    "27": {
+        "type": 0,
+        "val": 2
+    },
+    "28": {
+        "type": 5,
+        "name": "i14"
+    },
+    "29": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "30",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "i65": [
+                0,
+                4
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "30": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "31",
+        "rhs": "45"
+    },
+    "31": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "32",
+        "indexes": [
+            "33",
+            "34",
+            "35",
+            "40"
+        ]
+    },
+    "32": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "33": {
+        "type": 5,
+        "name": "n"
+    },
+    "34": {
+        "type": 5,
+        "name": "c"
+    },
+    "35": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i4"
+    },
+    "38": {
+        "type": 5,
+        "name": "i59"
+    },
+    "39": {
+        "type": 0,
+        "val": -2
+    },
+    "40": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "41",
+        "rhs": "44"
+    },
+    "41": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 5,
+        "name": "i14"
+    },
+    "43": {
+        "type": 5,
+        "name": "i63"
+    },
+    "44": {
+        "type": 0,
+        "val": -2
+    },
+    "45": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "46",
+        "indexes": [
+            "70",
+            "71",
+            "72",
+            "73"
+        ]
+    },
+    "46": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "47"
+    },
+    "47": {
+        "type": 105,
+        "inputs": [
+            "48"
+        ],
+        "expr": "49",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "48": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "49": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "50",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "50": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "51",
+        "indexes": [
+            "52",
+            "55",
+            "56",
+            "63"
+        ]
+    },
+    "51": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "52": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "53",
+        "rhs": "54"
+    },
+    "53": {
+        "type": 5,
+        "name": "i65"
+    },
+    "54": {
+        "type": 0,
+        "val": 4
+    },
+    "55": {
+        "type": 5,
+        "name": "c"
+    },
+    "56": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "57",
+        "rhs": "62"
+    },
+    "57": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 0,
+        "val": 3
+    },
+    "59": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 5,
+        "name": "i65"
+    },
+    "61": {
+        "type": 0,
+        "val": 2
+    },
+    "62": {
+        "type": 5,
+        "name": "i4"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "69"
+    },
+    "64": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 0,
+        "val": 3
+    },
+    "66": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "67",
+        "rhs": "68"
+    },
+    "67": {
+        "type": 5,
+        "name": "i65"
+    },
+    "68": {
+        "type": 0,
+        "val": 2
+    },
+    "69": {
+        "type": 5,
+        "name": "i14"
+    },
+    "70": {
+        "type": 5,
+        "name": "i65"
+    },
+    "71": {
+        "type": 5,
+        "name": "c"
+    },
+    "72": {
+        "type": 5,
+        "name": "i4"
+    },
+    "73": {
+        "type": 5,
+        "name": "i14"
+    },
+    "74": {
+        "type": 5,
+        "name": "n"
+    },
+    "75": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "76",
+        "rhs": "83"
+    },
+    "76": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "77",
+        "rhs": "80"
+    },
+    "77": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "78",
+        "rhs": "79"
+    },
+    "78": {
+        "type": 0,
+        "val": 4
+    },
+    "79": {
+        "type": 5,
+        "name": "f"
+    },
+    "80": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "81",
+        "rhs": "82"
+    },
+    "81": {
+        "type": 0,
+        "val": 2
+    },
+    "82": {
+        "type": 5,
+        "name": "i3"
+    },
+    "83": {
+        "type": 5,
+        "name": "i13"
+    },
+    "84": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "85",
+        "rhs": "86"
+    },
+    "85": {
+        "type": 5,
+        "name": "h"
+    },
+    "86": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "87",
+        "rhs": "88"
+    },
+    "87": {
+        "type": 0,
+        "val": 3
+    },
+    "88": {
+        "type": 5,
+        "name": "i3"
+    },
+    "89": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "90",
+        "rhs": "91"
+    },
+    "90": {
+        "type": 5,
+        "name": "w"
+    },
+    "91": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "92",
+        "rhs": "93"
+    },
+    "92": {
+        "type": 0,
+        "val": 3
+    },
+    "93": {
+        "type": 5,
+        "name": "i13"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_15.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_15.expr
new file mode 100644
index 00000000..e311ad32
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_15.expr
@@ -0,0 +1,1353 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 15 Rule6KenerlMatching: ",
+    "0": {
+        "type": 4,
+        "name": "T8",
+        "shape": [
+            16,
+            1,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "1"
+    },
+    "1": {
+        "type": 105,
+        "inputs": [
+            "2"
+        ],
+        "expr": "74",
+        "outputShape": [
+            16,
+            1,
+            224,
+            224
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T7",
+        "shape": [
+            16,
+            4,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 102,
+        "inputs": [
+            "4",
+            "5"
+        ],
+        "expr": "29",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "5": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "9",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "9": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "10",
+        "indexes": [
+            "11",
+            "14",
+            "15",
+            "22"
+        ]
+    },
+    "10": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "11": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i65"
+    },
+    "13": {
+        "type": 0,
+        "val": 4
+    },
+    "14": {
+        "type": 5,
+        "name": "c"
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "21"
+    },
+    "16": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 0,
+        "val": 3
+    },
+    "18": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i65"
+    },
+    "20": {
+        "type": 0,
+        "val": 2
+    },
+    "21": {
+        "type": 5,
+        "name": "i4"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "28"
+    },
+    "23": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 0,
+        "val": 3
+    },
+    "25": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 5,
+        "name": "i65"
+    },
+    "27": {
+        "type": 0,
+        "val": 2
+    },
+    "28": {
+        "type": 5,
+        "name": "i14"
+    },
+    "29": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "30",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "i65": [
+                0,
+                4
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "30": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "31",
+        "rhs": "45"
+    },
+    "31": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "32",
+        "indexes": [
+            "33",
+            "34",
+            "35",
+            "40"
+        ]
+    },
+    "32": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "33": {
+        "type": 5,
+        "name": "n"
+    },
+    "34": {
+        "type": 5,
+        "name": "c"
+    },
+    "35": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i4"
+    },
+    "38": {
+        "type": 5,
+        "name": "i59"
+    },
+    "39": {
+        "type": 0,
+        "val": -2
+    },
+    "40": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "41",
+        "rhs": "44"
+    },
+    "41": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 5,
+        "name": "i14"
+    },
+    "43": {
+        "type": 5,
+        "name": "i63"
+    },
+    "44": {
+        "type": 0,
+        "val": -2
+    },
+    "45": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "46",
+        "indexes": [
+            "70",
+            "71",
+            "72",
+            "73"
+        ]
+    },
+    "46": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "47"
+    },
+    "47": {
+        "type": 105,
+        "inputs": [
+            "48"
+        ],
+        "expr": "49",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "48": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "49": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "50",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "50": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "51",
+        "indexes": [
+            "52",
+            "55",
+            "56",
+            "63"
+        ]
+    },
+    "51": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "52": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "53",
+        "rhs": "54"
+    },
+    "53": {
+        "type": 5,
+        "name": "i65"
+    },
+    "54": {
+        "type": 0,
+        "val": 4
+    },
+    "55": {
+        "type": 5,
+        "name": "c"
+    },
+    "56": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "57",
+        "rhs": "62"
+    },
+    "57": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 0,
+        "val": 3
+    },
+    "59": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 5,
+        "name": "i65"
+    },
+    "61": {
+        "type": 0,
+        "val": 2
+    },
+    "62": {
+        "type": 5,
+        "name": "i4"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "69"
+    },
+    "64": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 0,
+        "val": 3
+    },
+    "66": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "67",
+        "rhs": "68"
+    },
+    "67": {
+        "type": 5,
+        "name": "i65"
+    },
+    "68": {
+        "type": 0,
+        "val": 2
+    },
+    "69": {
+        "type": 5,
+        "name": "i14"
+    },
+    "70": {
+        "type": 5,
+        "name": "i65"
+    },
+    "71": {
+        "type": 5,
+        "name": "c"
+    },
+    "72": {
+        "type": 5,
+        "name": "i4"
+    },
+    "73": {
+        "type": 5,
+        "name": "i14"
+    },
+    "74": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "75",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "75": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "76",
+        "indexes": [
+            "148",
+            "149",
+            "158",
+            "163"
+        ]
+    },
+    "76": {
+        "type": 4,
+        "name": "T7",
+        "shape": [
+            16,
+            4,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "77"
+    },
+    "77": {
+        "type": 102,
+        "inputs": [
+            "78",
+            "79"
+        ],
+        "expr": "103",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "78": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "79": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "80"
+    },
+    "80": {
+        "type": 105,
+        "inputs": [
+            "81"
+        ],
+        "expr": "82",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "81": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "82": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "83",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "83": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "84",
+        "indexes": [
+            "85",
+            "88",
+            "89",
+            "96"
+        ]
+    },
+    "84": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "85": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "86",
+        "rhs": "87"
+    },
+    "86": {
+        "type": 5,
+        "name": "i65"
+    },
+    "87": {
+        "type": 0,
+        "val": 4
+    },
+    "88": {
+        "type": 5,
+        "name": "c"
+    },
+    "89": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "90",
+        "rhs": "95"
+    },
+    "90": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "91",
+        "rhs": "92"
+    },
+    "91": {
+        "type": 0,
+        "val": 3
+    },
+    "92": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "93",
+        "rhs": "94"
+    },
+    "93": {
+        "type": 5,
+        "name": "i65"
+    },
+    "94": {
+        "type": 0,
+        "val": 2
+    },
+    "95": {
+        "type": 5,
+        "name": "i4"
+    },
+    "96": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "97",
+        "rhs": "102"
+    },
+    "97": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "98",
+        "rhs": "99"
+    },
+    "98": {
+        "type": 0,
+        "val": 3
+    },
+    "99": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "100",
+        "rhs": "101"
+    },
+    "100": {
+        "type": 5,
+        "name": "i65"
+    },
+    "101": {
+        "type": 0,
+        "val": 2
+    },
+    "102": {
+        "type": 5,
+        "name": "i14"
+    },
+    "103": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "104",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "i65": [
+                0,
+                4
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "104": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "105",
+        "rhs": "119"
+    },
+    "105": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "106",
+        "indexes": [
+            "107",
+            "108",
+            "109",
+            "114"
+        ]
+    },
+    "106": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "107": {
+        "type": 5,
+        "name": "n"
+    },
+    "108": {
+        "type": 5,
+        "name": "c"
+    },
+    "109": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "110",
+        "rhs": "113"
+    },
+    "110": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "111",
+        "rhs": "112"
+    },
+    "111": {
+        "type": 5,
+        "name": "i4"
+    },
+    "112": {
+        "type": 5,
+        "name": "i59"
+    },
+    "113": {
+        "type": 0,
+        "val": -2
+    },
+    "114": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "115",
+        "rhs": "118"
+    },
+    "115": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "116",
+        "rhs": "117"
+    },
+    "116": {
+        "type": 5,
+        "name": "i14"
+    },
+    "117": {
+        "type": 5,
+        "name": "i63"
+    },
+    "118": {
+        "type": 0,
+        "val": -2
+    },
+    "119": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "120",
+        "indexes": [
+            "144",
+            "145",
+            "146",
+            "147"
+        ]
+    },
+    "120": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            4,
+            32,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "121"
+    },
+    "121": {
+        "type": 105,
+        "inputs": [
+            "122"
+        ],
+        "expr": "123",
+        "outputShape": [
+            4,
+            32,
+            3,
+            3
+        ]
+    },
+    "122": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "123": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "124",
+        "loopVarRanges": {
+            "i65": [
+                0,
+                4
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "124": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "125",
+        "indexes": [
+            "126",
+            "129",
+            "130",
+            "137"
+        ]
+    },
+    "125": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "126": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "127",
+        "rhs": "128"
+    },
+    "127": {
+        "type": 5,
+        "name": "i65"
+    },
+    "128": {
+        "type": 0,
+        "val": 4
+    },
+    "129": {
+        "type": 5,
+        "name": "c"
+    },
+    "130": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "131",
+        "rhs": "136"
+    },
+    "131": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "132",
+        "rhs": "133"
+    },
+    "132": {
+        "type": 0,
+        "val": 3
+    },
+    "133": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "134",
+        "rhs": "135"
+    },
+    "134": {
+        "type": 5,
+        "name": "i65"
+    },
+    "135": {
+        "type": 0,
+        "val": 2
+    },
+    "136": {
+        "type": 5,
+        "name": "i4"
+    },
+    "137": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "138",
+        "rhs": "143"
+    },
+    "138": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "139",
+        "rhs": "140"
+    },
+    "139": {
+        "type": 0,
+        "val": 3
+    },
+    "140": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "141",
+        "rhs": "142"
+    },
+    "141": {
+        "type": 5,
+        "name": "i65"
+    },
+    "142": {
+        "type": 0,
+        "val": 2
+    },
+    "143": {
+        "type": 5,
+        "name": "i14"
+    },
+    "144": {
+        "type": 5,
+        "name": "i65"
+    },
+    "145": {
+        "type": 5,
+        "name": "c"
+    },
+    "146": {
+        "type": 5,
+        "name": "i4"
+    },
+    "147": {
+        "type": 5,
+        "name": "i14"
+    },
+    "148": {
+        "type": 5,
+        "name": "n"
+    },
+    "149": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "150",
+        "rhs": "157"
+    },
+    "150": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "151",
+        "rhs": "154"
+    },
+    "151": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "152",
+        "rhs": "153"
+    },
+    "152": {
+        "type": 0,
+        "val": 4
+    },
+    "153": {
+        "type": 5,
+        "name": "f"
+    },
+    "154": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "155",
+        "rhs": "156"
+    },
+    "155": {
+        "type": 0,
+        "val": 2
+    },
+    "156": {
+        "type": 5,
+        "name": "i3"
+    },
+    "157": {
+        "type": 5,
+        "name": "i13"
+    },
+    "158": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "159",
+        "rhs": "160"
+    },
+    "159": {
+        "type": 5,
+        "name": "h"
+    },
+    "160": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "161",
+        "rhs": "162"
+    },
+    "161": {
+        "type": 0,
+        "val": 3
+    },
+    "162": {
+        "type": 5,
+        "name": "i3"
+    },
+    "163": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "164",
+        "rhs": "165"
+    },
+    "164": {
+        "type": 5,
+        "name": "w"
+    },
+    "165": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "166",
+        "rhs": "167"
+    },
+    "166": {
+        "type": 0,
+        "val": 3
+    },
+    "167": {
+        "type": 5,
+        "name": "i13"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_2.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_2.expr
new file mode 100644
index 00000000..7ba3a6ea
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_2.expr
@@ -0,0 +1,224 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 2 Rule1VariableSplit: Old iters: [r], new iters: [i3,i4] phis: [] psis: [((3 * i3) + i4)]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i3": [
+                0,
+                2
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ],
+            "s": [
+                0,
+                6
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "20"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "15"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "14"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "h"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "13"
+    },
+    "10": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 0,
+        "val": 3
+    },
+    "12": {
+        "type": 5,
+        "name": "i3"
+    },
+    "13": {
+        "type": 5,
+        "name": "i4"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "w"
+    },
+    "18": {
+        "type": 5,
+        "name": "s"
+    },
+    "19": {
+        "type": 0,
+        "val": -2
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "22",
+            "23",
+            "24",
+            "29"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "22": {
+        "type": 5,
+        "name": "f"
+    },
+    "23": {
+        "type": 5,
+        "name": "c"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "28"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 3
+    },
+    "27": {
+        "type": 5,
+        "name": "i3"
+    },
+    "28": {
+        "type": 5,
+        "name": "i4"
+    },
+    "29": {
+        "type": 5,
+        "name": "s"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_3.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_3.expr
new file mode 100644
index 00000000..ca52d223
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_3.expr
@@ -0,0 +1,268 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 3 Rule1VariableSplit: Old iters: [s], new iters: [i13,i14] phis: [] psis: [((3 * i13) + i14)]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i14": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                2
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "24"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "15"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "14"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "h"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "13"
+    },
+    "10": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 0,
+        "val": 3
+    },
+    "12": {
+        "type": 5,
+        "name": "i3"
+    },
+    "13": {
+        "type": 5,
+        "name": "i4"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "23"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "w"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "22"
+    },
+    "19": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 0,
+        "val": 3
+    },
+    "21": {
+        "type": 5,
+        "name": "i13"
+    },
+    "22": {
+        "type": 5,
+        "name": "i14"
+    },
+    "23": {
+        "type": 0,
+        "val": -2
+    },
+    "24": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "25",
+        "indexes": [
+            "26",
+            "27",
+            "28",
+            "33"
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "c"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "32"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 5,
+        "name": "i3"
+    },
+    "32": {
+        "type": 5,
+        "name": "i4"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "37"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i14"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_4.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_4.expr
new file mode 100644
index 00000000..67ed0fa4
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_4.expr
@@ -0,0 +1,345 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 4 Rule3StageSplit: Separate sum iters: [i13,i3]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "40",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "26"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "17"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "16"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "h"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "15"
+    },
+    "12": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 0,
+        "val": 3
+    },
+    "14": {
+        "type": 5,
+        "name": "i3"
+    },
+    "15": {
+        "type": 5,
+        "name": "i4"
+    },
+    "16": {
+        "type": 0,
+        "val": -2
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "25"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "w"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "24"
+    },
+    "21": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 0,
+        "val": 3
+    },
+    "23": {
+        "type": 5,
+        "name": "i13"
+    },
+    "24": {
+        "type": 5,
+        "name": "i14"
+    },
+    "25": {
+        "type": 0,
+        "val": -2
+    },
+    "26": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "27",
+        "indexes": [
+            "28",
+            "29",
+            "30",
+            "35"
+        ]
+    },
+    "27": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 5,
+        "name": "c"
+    },
+    "30": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "31",
+        "rhs": "34"
+    },
+    "31": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 0,
+        "val": 3
+    },
+    "33": {
+        "type": 5,
+        "name": "i3"
+    },
+    "34": {
+        "type": 5,
+        "name": "i4"
+    },
+    "35": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 0,
+        "val": 3
+    },
+    "38": {
+        "type": 5,
+        "name": "i13"
+    },
+    "39": {
+        "type": 5,
+        "name": "i14"
+    },
+    "40": {
+        "type": 5,
+        "name": "i13"
+    },
+    "41": {
+        "type": 5,
+        "name": "i3"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "f"
+    },
+    "44": {
+        "type": 5,
+        "name": "h"
+    },
+    "45": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_5.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_5.expr
new file mode 100644
index 00000000..1c0a4404
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_5.expr
@@ -0,0 +1,420 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 5 Rule2VariableMerging: Old iters: [h,i3], new iters: [i59,i60] phis: [(h + (3 * i3)),i3] psis: [(i59 - (3 * i60)),i60]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "48",
+            "49",
+            "50",
+            "51",
+            "52",
+            "53"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "38",
+            "43",
+            "44",
+            "45",
+            "46",
+            "47"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i59": [
+                0,
+                227
+            ],
+            "i60": [
+                0,
+                2
+            ],
+            "i13": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "24"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i59"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "23"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "22"
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "21"
+    },
+    "18": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 0,
+        "val": 3
+    },
+    "20": {
+        "type": 5,
+        "name": "i13"
+    },
+    "21": {
+        "type": 5,
+        "name": "i14"
+    },
+    "22": {
+        "type": 5,
+        "name": "w"
+    },
+    "23": {
+        "type": 0,
+        "val": -2
+    },
+    "24": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "25",
+        "indexes": [
+            "26",
+            "27",
+            "28",
+            "33"
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "c"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "i4"
+    },
+    "30": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 0,
+        "val": 3
+    },
+    "32": {
+        "type": 5,
+        "name": "i60"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "37"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i14"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "h"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 5,
+        "name": "i3"
+    },
+    "43": {
+        "type": 5,
+        "name": "i3"
+    },
+    "44": {
+        "type": 5,
+        "name": "i13"
+    },
+    "45": {
+        "type": 5,
+        "name": "n"
+    },
+    "46": {
+        "type": 5,
+        "name": "f"
+    },
+    "47": {
+        "type": 5,
+        "name": "w"
+    },
+    "48": {
+        "type": 5,
+        "name": "i13"
+    },
+    "49": {
+        "type": 5,
+        "name": "i3"
+    },
+    "50": {
+        "type": 5,
+        "name": "n"
+    },
+    "51": {
+        "type": 5,
+        "name": "f"
+    },
+    "52": {
+        "type": 5,
+        "name": "h"
+    },
+    "53": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_6.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_6.expr
new file mode 100644
index 00000000..3ec6ab2d
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_6.expr
@@ -0,0 +1,345 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 6 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "36",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i59": [
+                0,
+                227
+            ],
+            "i60": [
+                0,
+                2
+            ],
+            "i13": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "22"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "12"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i4"
+    },
+    "11": {
+        "type": 5,
+        "name": "i59"
+    },
+    "12": {
+        "type": 0,
+        "val": -2
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "21"
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "20"
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 0,
+        "val": 3
+    },
+    "18": {
+        "type": 5,
+        "name": "i13"
+    },
+    "19": {
+        "type": 5,
+        "name": "i14"
+    },
+    "20": {
+        "type": 5,
+        "name": "w"
+    },
+    "21": {
+        "type": 0,
+        "val": -2
+    },
+    "22": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "23",
+        "indexes": [
+            "24",
+            "25",
+            "26",
+            "31"
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "24": {
+        "type": 5,
+        "name": "f"
+    },
+    "25": {
+        "type": 5,
+        "name": "c"
+    },
+    "26": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 5,
+        "name": "i4"
+    },
+    "28": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 0,
+        "val": 3
+    },
+    "30": {
+        "type": 5,
+        "name": "i60"
+    },
+    "31": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "32",
+        "rhs": "35"
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 0,
+        "val": 3
+    },
+    "34": {
+        "type": 5,
+        "name": "i13"
+    },
+    "35": {
+        "type": 5,
+        "name": "i14"
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "h"
+    },
+    "38": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 0,
+        "val": 3
+    },
+    "40": {
+        "type": 5,
+        "name": "i3"
+    },
+    "41": {
+        "type": 5,
+        "name": "i3"
+    },
+    "42": {
+        "type": 5,
+        "name": "i13"
+    },
+    "43": {
+        "type": 5,
+        "name": "n"
+    },
+    "44": {
+        "type": 5,
+        "name": "f"
+    },
+    "45": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_7.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_7.expr
new file mode 100644
index 00000000..c42f72b3
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_7.expr
@@ -0,0 +1,420 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 7 Rule2VariableMerging: Old iters: [w,i13], new iters: [i63,i64] phis: [(w + (3 * i13)),i13] psis: [(i63 - (3 * i64)),i64]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "44",
+            "49",
+            "50",
+            "51",
+            "52",
+            "53"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i59": [
+                0,
+                227
+            ],
+            "i60": [
+                0,
+                2
+            ],
+            "i13": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "w": [
+                0,
+                224
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "34",
+            "39",
+            "40",
+            "41",
+            "42",
+            "43"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i63": [
+                0,
+                227
+            ],
+            "i64": [
+                0,
+                2
+            ],
+            "i59": [
+                0,
+                227
+            ],
+            "i60": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "20"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i59"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i14"
+    },
+    "18": {
+        "type": 5,
+        "name": "i63"
+    },
+    "19": {
+        "type": 0,
+        "val": -2
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "22",
+            "23",
+            "24",
+            "29"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "22": {
+        "type": 5,
+        "name": "f"
+    },
+    "23": {
+        "type": 5,
+        "name": "c"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "i4"
+    },
+    "26": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 0,
+        "val": 3
+    },
+    "28": {
+        "type": 5,
+        "name": "i60"
+    },
+    "29": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "i14"
+    },
+    "31": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 0,
+        "val": 3
+    },
+    "33": {
+        "type": 5,
+        "name": "i64"
+    },
+    "34": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "w"
+    },
+    "36": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 0,
+        "val": 3
+    },
+    "38": {
+        "type": 5,
+        "name": "i13"
+    },
+    "39": {
+        "type": 5,
+        "name": "i13"
+    },
+    "40": {
+        "type": 5,
+        "name": "i59"
+    },
+    "41": {
+        "type": 5,
+        "name": "i60"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "f"
+    },
+    "44": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "45",
+        "rhs": "46"
+    },
+    "45": {
+        "type": 5,
+        "name": "h"
+    },
+    "46": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "47",
+        "rhs": "48"
+    },
+    "47": {
+        "type": 0,
+        "val": 3
+    },
+    "48": {
+        "type": 5,
+        "name": "i3"
+    },
+    "49": {
+        "type": 5,
+        "name": "i3"
+    },
+    "50": {
+        "type": 5,
+        "name": "i13"
+    },
+    "51": {
+        "type": 5,
+        "name": "n"
+    },
+    "52": {
+        "type": 5,
+        "name": "f"
+    },
+    "53": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_8.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_8.expr
new file mode 100644
index 00000000..088efd57
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_8.expr
@@ -0,0 +1,345 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 8 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "32",
+            "37",
+            "38",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i63": [
+                0,
+                227
+            ],
+            "i64": [
+                0,
+                2
+            ],
+            "i59": [
+                0,
+                227
+            ],
+            "i60": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "18"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "12"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i4"
+    },
+    "11": {
+        "type": 5,
+        "name": "i59"
+    },
+    "12": {
+        "type": 0,
+        "val": -2
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "17"
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i14"
+    },
+    "16": {
+        "type": 5,
+        "name": "i63"
+    },
+    "17": {
+        "type": 0,
+        "val": -2
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "21",
+            "22",
+            "27"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "i4"
+    },
+    "24": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 0,
+        "val": 3
+    },
+    "26": {
+        "type": 5,
+        "name": "i60"
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i14"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 5,
+        "name": "i64"
+    },
+    "32": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "w"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i13"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "h"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 5,
+        "name": "i3"
+    },
+    "43": {
+        "type": 5,
+        "name": "i3"
+    },
+    "44": {
+        "type": 5,
+        "name": "n"
+    },
+    "45": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_9.expr b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_9.expr
new file mode 100644
index 00000000..7379dfc1
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_9.expr
@@ -0,0 +1,345 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 9 Rule5RangeRelaxation: i63 (0,227) to (0,226),i59 (0,227) to (0,226),",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "32",
+            "37",
+            "38",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i63": [
+                0,
+                226
+            ],
+            "i64": [
+                0,
+                2
+            ],
+            "i59": [
+                0,
+                226
+            ],
+            "i60": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                16
+            ],
+            "f": [
+                0,
+                1
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                32
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "18"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            16,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "12"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i4"
+    },
+    "11": {
+        "type": 5,
+        "name": "i59"
+    },
+    "12": {
+        "type": 0,
+        "val": -2
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "17"
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i14"
+    },
+    "16": {
+        "type": 5,
+        "name": "i63"
+    },
+    "17": {
+        "type": 0,
+        "val": -2
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "21",
+            "22",
+            "27"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            1,
+            32,
+            5,
+            5
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "i4"
+    },
+    "24": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 0,
+        "val": 3
+    },
+    "26": {
+        "type": 5,
+        "name": "i60"
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i14"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 5,
+        "name": "i64"
+    },
+    "32": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "w"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i13"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "h"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 5,
+        "name": "i3"
+    },
+    "43": {
+        "type": 5,
+        "name": "i3"
+    },
+    "44": {
+        "type": 5,
+        "name": "n"
+    },
+    "45": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_0.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_0.expr
new file mode 100644
index 00000000..c55ff89c
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_0.expr
@@ -0,0 +1,179 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                16
+            ],
+            "r": [
+                0,
+                6
+            ],
+            "s": [
+                0,
+                6
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "16"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "11"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "10"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "h"
+    },
+    "9": {
+        "type": 5,
+        "name": "r"
+    },
+    "10": {
+        "type": 0,
+        "val": -2
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "15"
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "w"
+    },
+    "14": {
+        "type": 5,
+        "name": "s"
+    },
+    "15": {
+        "type": 0,
+        "val": -2
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "f"
+    },
+    "19": {
+        "type": 5,
+        "name": "c"
+    },
+    "20": {
+        "type": 5,
+        "name": "r"
+    },
+    "21": {
+        "type": 5,
+        "name": "s"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_1.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_1.expr
new file mode 100644
index 00000000..87084ec5
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_1.expr
@@ -0,0 +1,223 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i3": [
+                0,
+                2
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "s": [
+                0,
+                6
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "20"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "15"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "14"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "h"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "13"
+    },
+    "10": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 0,
+        "val": 3
+    },
+    "12": {
+        "type": 5,
+        "name": "i3"
+    },
+    "13": {
+        "type": 5,
+        "name": "i4"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "w"
+    },
+    "18": {
+        "type": 5,
+        "name": "s"
+    },
+    "19": {
+        "type": 0,
+        "val": -2
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "22",
+            "23",
+            "24",
+            "29"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "22": {
+        "type": 5,
+        "name": "f"
+    },
+    "23": {
+        "type": 5,
+        "name": "c"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "28"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 3
+    },
+    "27": {
+        "type": 5,
+        "name": "i3"
+    },
+    "28": {
+        "type": 5,
+        "name": "i4"
+    },
+    "29": {
+        "type": 5,
+        "name": "s"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_10.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_10.expr
new file mode 100644
index 00000000..059c9c8b
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_10.expr
@@ -0,0 +1,636 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "69",
+            "74",
+            "75",
+            "80",
+            "81",
+            "82"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                2
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i59": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "57",
+            "66",
+            "67",
+            "68"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "i62": [
+                0,
+                226
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "n": [
+                0,
+                1
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "6",
+        "indexes": [
+            "53",
+            "54",
+            "55",
+            "56"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "i64": [
+                0,
+                256
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i62": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "22"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "9",
+        "indexes": [
+            "10",
+            "11",
+            "12",
+            "17"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 5,
+        "name": "n"
+    },
+    "11": {
+        "type": 5,
+        "name": "c"
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "16"
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "i4"
+    },
+    "15": {
+        "type": 5,
+        "name": "i58"
+    },
+    "16": {
+        "type": 0,
+        "val": -2
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "21"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i14"
+    },
+    "20": {
+        "type": 5,
+        "name": "i62"
+    },
+    "21": {
+        "type": 0,
+        "val": -2
+    },
+    "22": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "23",
+        "indexes": [
+            "49",
+            "50",
+            "51",
+            "52"
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "24"
+    },
+    "24": {
+        "type": 105,
+        "inputs": [
+            "25"
+        ],
+        "expr": "26",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "27",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "27": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "28",
+        "indexes": [
+            "29",
+            "32",
+            "33",
+            "42"
+        ]
+    },
+    "28": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "29": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "i64"
+    },
+    "31": {
+        "type": 0,
+        "val": 4
+    },
+    "32": {
+        "type": 5,
+        "name": "c"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 5,
+        "name": "i4"
+    },
+    "35": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "36",
+        "rhs": "37"
+    },
+    "36": {
+        "type": 0,
+        "val": 3
+    },
+    "37": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "38",
+        "rhs": "41"
+    },
+    "38": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "i64"
+    },
+    "40": {
+        "type": 0,
+        "val": 2
+    },
+    "41": {
+        "type": 0,
+        "val": 2
+    },
+    "42": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "43",
+        "rhs": "48"
+    },
+    "43": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 0,
+        "val": 3
+    },
+    "45": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "46",
+        "rhs": "47"
+    },
+    "46": {
+        "type": 5,
+        "name": "i64"
+    },
+    "47": {
+        "type": 0,
+        "val": 2
+    },
+    "48": {
+        "type": 5,
+        "name": "i14"
+    },
+    "49": {
+        "type": 5,
+        "name": "i64"
+    },
+    "50": {
+        "type": 5,
+        "name": "c"
+    },
+    "51": {
+        "type": 5,
+        "name": "i4"
+    },
+    "52": {
+        "type": 5,
+        "name": "i14"
+    },
+    "53": {
+        "type": 5,
+        "name": "n"
+    },
+    "54": {
+        "type": 5,
+        "name": "i64"
+    },
+    "55": {
+        "type": 5,
+        "name": "i58"
+    },
+    "56": {
+        "type": 5,
+        "name": "i62"
+    },
+    "57": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "58",
+        "rhs": "65"
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "62"
+    },
+    "59": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 0,
+        "val": 4
+    },
+    "61": {
+        "type": 5,
+        "name": "f"
+    },
+    "62": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "63",
+        "rhs": "64"
+    },
+    "63": {
+        "type": 0,
+        "val": 2
+    },
+    "64": {
+        "type": 5,
+        "name": "i59"
+    },
+    "65": {
+        "type": 5,
+        "name": "i63"
+    },
+    "66": {
+        "type": 5,
+        "name": "i62"
+    },
+    "67": {
+        "type": 5,
+        "name": "i58"
+    },
+    "68": {
+        "type": 5,
+        "name": "n"
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 5,
+        "name": "w"
+    },
+    "71": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "72",
+        "rhs": "73"
+    },
+    "72": {
+        "type": 0,
+        "val": 3
+    },
+    "73": {
+        "type": 5,
+        "name": "i13"
+    },
+    "74": {
+        "type": 5,
+        "name": "i13"
+    },
+    "75": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "76",
+        "rhs": "77"
+    },
+    "76": {
+        "type": 5,
+        "name": "h"
+    },
+    "77": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "78",
+        "rhs": "79"
+    },
+    "78": {
+        "type": 0,
+        "val": 3
+    },
+    "79": {
+        "type": 5,
+        "name": "i3"
+    },
+    "80": {
+        "type": 5,
+        "name": "i3"
+    },
+    "81": {
+        "type": 5,
+        "name": "n"
+    },
+    "82": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_11.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_11.expr
new file mode 100644
index 00000000..c2768860
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_11.expr
@@ -0,0 +1,581 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "63",
+            "68",
+            "69",
+            "74",
+            "75",
+            "76"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                2
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i59": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "51",
+            "52",
+            "61",
+            "62"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "i64": [
+                0,
+                256
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i62": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "20"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i58"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i14"
+    },
+    "18": {
+        "type": 5,
+        "name": "i62"
+    },
+    "19": {
+        "type": 0,
+        "val": -2
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "47",
+            "48",
+            "49",
+            "50"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "22"
+    },
+    "22": {
+        "type": 105,
+        "inputs": [
+            "23"
+        ],
+        "expr": "24",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "24": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "25",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "25": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "26",
+        "indexes": [
+            "27",
+            "30",
+            "31",
+            "40"
+        ]
+    },
+    "26": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "27": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i64"
+    },
+    "29": {
+        "type": 0,
+        "val": 4
+    },
+    "30": {
+        "type": 5,
+        "name": "c"
+    },
+    "31": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "i4"
+    },
+    "33": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 0,
+        "val": 3
+    },
+    "35": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i64"
+    },
+    "38": {
+        "type": 0,
+        "val": 2
+    },
+    "39": {
+        "type": 0,
+        "val": 2
+    },
+    "40": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "41",
+        "rhs": "46"
+    },
+    "41": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 0,
+        "val": 3
+    },
+    "43": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "i64"
+    },
+    "45": {
+        "type": 0,
+        "val": 2
+    },
+    "46": {
+        "type": 5,
+        "name": "i14"
+    },
+    "47": {
+        "type": 5,
+        "name": "i64"
+    },
+    "48": {
+        "type": 5,
+        "name": "c"
+    },
+    "49": {
+        "type": 5,
+        "name": "i4"
+    },
+    "50": {
+        "type": 5,
+        "name": "i14"
+    },
+    "51": {
+        "type": 5,
+        "name": "n"
+    },
+    "52": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "53",
+        "rhs": "60"
+    },
+    "53": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "54",
+        "rhs": "57"
+    },
+    "54": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 0,
+        "val": 4
+    },
+    "56": {
+        "type": 5,
+        "name": "f"
+    },
+    "57": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 0,
+        "val": 2
+    },
+    "59": {
+        "type": 5,
+        "name": "i59"
+    },
+    "60": {
+        "type": 5,
+        "name": "i63"
+    },
+    "61": {
+        "type": 5,
+        "name": "i58"
+    },
+    "62": {
+        "type": 5,
+        "name": "i62"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 5,
+        "name": "w"
+    },
+    "65": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 0,
+        "val": 3
+    },
+    "67": {
+        "type": 5,
+        "name": "i13"
+    },
+    "68": {
+        "type": 5,
+        "name": "i13"
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 5,
+        "name": "h"
+    },
+    "71": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "72",
+        "rhs": "73"
+    },
+    "72": {
+        "type": 0,
+        "val": 3
+    },
+    "73": {
+        "type": 5,
+        "name": "i3"
+    },
+    "74": {
+        "type": 5,
+        "name": "i3"
+    },
+    "75": {
+        "type": 5,
+        "name": "n"
+    },
+    "76": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_12.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_12.expr
new file mode 100644
index 00000000..54309718
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_12.expr
@@ -0,0 +1,830 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "92",
+            "97",
+            "98",
+            "103",
+            "104",
+            "105"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                2
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i59": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "80",
+            "81",
+            "90",
+            "91"
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            1,
+            256,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 102,
+        "inputs": [
+            "6",
+            "7"
+        ],
+        "expr": "33",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "7": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "8"
+    },
+    "8": {
+        "type": 105,
+        "inputs": [
+            "9"
+        ],
+        "expr": "10",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "11",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "11": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "12",
+        "indexes": [
+            "13",
+            "16",
+            "17",
+            "26"
+        ]
+    },
+    "12": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "13": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "i64"
+    },
+    "15": {
+        "type": 0,
+        "val": 4
+    },
+    "16": {
+        "type": 5,
+        "name": "c"
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "19"
+    },
+    "18": {
+        "type": 5,
+        "name": "i4"
+    },
+    "19": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 0,
+        "val": 3
+    },
+    "21": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "22",
+        "rhs": "25"
+    },
+    "22": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "i64"
+    },
+    "24": {
+        "type": 0,
+        "val": 2
+    },
+    "25": {
+        "type": 0,
+        "val": 2
+    },
+    "26": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "27",
+        "rhs": "32"
+    },
+    "27": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 0,
+        "val": 3
+    },
+    "29": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "i64"
+    },
+    "31": {
+        "type": 0,
+        "val": 2
+    },
+    "32": {
+        "type": 5,
+        "name": "i14"
+    },
+    "33": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "34",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "i64": [
+                0,
+                256
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i62": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "49"
+    },
+    "35": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "36",
+        "indexes": [
+            "37",
+            "38",
+            "39",
+            "44"
+        ]
+    },
+    "36": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "37": {
+        "type": 5,
+        "name": "n"
+    },
+    "38": {
+        "type": 5,
+        "name": "c"
+    },
+    "39": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "40",
+        "rhs": "43"
+    },
+    "40": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 5,
+        "name": "i4"
+    },
+    "42": {
+        "type": 5,
+        "name": "i58"
+    },
+    "43": {
+        "type": 0,
+        "val": -2
+    },
+    "44": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "45",
+        "rhs": "48"
+    },
+    "45": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "46",
+        "rhs": "47"
+    },
+    "46": {
+        "type": 5,
+        "name": "i14"
+    },
+    "47": {
+        "type": 5,
+        "name": "i62"
+    },
+    "48": {
+        "type": 0,
+        "val": -2
+    },
+    "49": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "50",
+        "indexes": [
+            "76",
+            "77",
+            "78",
+            "79"
+        ]
+    },
+    "50": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "51"
+    },
+    "51": {
+        "type": 105,
+        "inputs": [
+            "52"
+        ],
+        "expr": "53",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "52": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "53": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "54",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "54": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "55",
+        "indexes": [
+            "56",
+            "59",
+            "60",
+            "69"
+        ]
+    },
+    "55": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "56": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "57",
+        "rhs": "58"
+    },
+    "57": {
+        "type": 5,
+        "name": "i64"
+    },
+    "58": {
+        "type": 0,
+        "val": 4
+    },
+    "59": {
+        "type": 5,
+        "name": "c"
+    },
+    "60": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "61",
+        "rhs": "62"
+    },
+    "61": {
+        "type": 5,
+        "name": "i4"
+    },
+    "62": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "63",
+        "rhs": "64"
+    },
+    "63": {
+        "type": 0,
+        "val": 3
+    },
+    "64": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "65",
+        "rhs": "68"
+    },
+    "65": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 5,
+        "name": "i64"
+    },
+    "67": {
+        "type": 0,
+        "val": 2
+    },
+    "68": {
+        "type": 0,
+        "val": 2
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "75"
+    },
+    "70": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "71",
+        "rhs": "72"
+    },
+    "71": {
+        "type": 0,
+        "val": 3
+    },
+    "72": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "73",
+        "rhs": "74"
+    },
+    "73": {
+        "type": 5,
+        "name": "i64"
+    },
+    "74": {
+        "type": 0,
+        "val": 2
+    },
+    "75": {
+        "type": 5,
+        "name": "i14"
+    },
+    "76": {
+        "type": 5,
+        "name": "i64"
+    },
+    "77": {
+        "type": 5,
+        "name": "c"
+    },
+    "78": {
+        "type": 5,
+        "name": "i4"
+    },
+    "79": {
+        "type": 5,
+        "name": "i14"
+    },
+    "80": {
+        "type": 5,
+        "name": "n"
+    },
+    "81": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "82",
+        "rhs": "89"
+    },
+    "82": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "83",
+        "rhs": "86"
+    },
+    "83": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "84",
+        "rhs": "85"
+    },
+    "84": {
+        "type": 0,
+        "val": 4
+    },
+    "85": {
+        "type": 5,
+        "name": "f"
+    },
+    "86": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "87",
+        "rhs": "88"
+    },
+    "87": {
+        "type": 0,
+        "val": 2
+    },
+    "88": {
+        "type": 5,
+        "name": "i59"
+    },
+    "89": {
+        "type": 5,
+        "name": "i63"
+    },
+    "90": {
+        "type": 5,
+        "name": "i58"
+    },
+    "91": {
+        "type": 5,
+        "name": "i62"
+    },
+    "92": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "93",
+        "rhs": "94"
+    },
+    "93": {
+        "type": 5,
+        "name": "w"
+    },
+    "94": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "95",
+        "rhs": "96"
+    },
+    "95": {
+        "type": 0,
+        "val": 3
+    },
+    "96": {
+        "type": 5,
+        "name": "i13"
+    },
+    "97": {
+        "type": 5,
+        "name": "i13"
+    },
+    "98": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "99",
+        "rhs": "100"
+    },
+    "99": {
+        "type": 5,
+        "name": "h"
+    },
+    "100": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "101",
+        "rhs": "102"
+    },
+    "101": {
+        "type": 0,
+        "val": 3
+    },
+    "102": {
+        "type": 5,
+        "name": "i3"
+    },
+    "103": {
+        "type": 5,
+        "name": "i3"
+    },
+    "104": {
+        "type": 5,
+        "name": "n"
+    },
+    "105": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_13.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_13.expr
new file mode 100644
index 00000000..36a0e6d6
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_13.expr
@@ -0,0 +1,755 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "2",
+        "indexes": [
+            "78",
+            "79",
+            "88",
+            "93"
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            1,
+            256,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 102,
+        "inputs": [
+            "4",
+            "5"
+        ],
+        "expr": "31",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "5": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "9",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "9": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "10",
+        "indexes": [
+            "11",
+            "14",
+            "15",
+            "24"
+        ]
+    },
+    "10": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "11": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i64"
+    },
+    "13": {
+        "type": 0,
+        "val": 4
+    },
+    "14": {
+        "type": 5,
+        "name": "c"
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i4"
+    },
+    "17": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "18",
+        "rhs": "19"
+    },
+    "18": {
+        "type": 0,
+        "val": 3
+    },
+    "19": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "20",
+        "rhs": "23"
+    },
+    "20": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "i64"
+    },
+    "22": {
+        "type": 0,
+        "val": 2
+    },
+    "23": {
+        "type": 0,
+        "val": 2
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "30"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 3
+    },
+    "27": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i64"
+    },
+    "29": {
+        "type": 0,
+        "val": 2
+    },
+    "30": {
+        "type": 5,
+        "name": "i14"
+    },
+    "31": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "32",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "i64": [
+                0,
+                256
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i62": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "47"
+    },
+    "33": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "34",
+        "indexes": [
+            "35",
+            "36",
+            "37",
+            "42"
+        ]
+    },
+    "34": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "35": {
+        "type": 5,
+        "name": "n"
+    },
+    "36": {
+        "type": 5,
+        "name": "c"
+    },
+    "37": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "38",
+        "rhs": "41"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "i4"
+    },
+    "40": {
+        "type": 5,
+        "name": "i58"
+    },
+    "41": {
+        "type": 0,
+        "val": -2
+    },
+    "42": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "43",
+        "rhs": "46"
+    },
+    "43": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "i14"
+    },
+    "45": {
+        "type": 5,
+        "name": "i62"
+    },
+    "46": {
+        "type": 0,
+        "val": -2
+    },
+    "47": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "48",
+        "indexes": [
+            "74",
+            "75",
+            "76",
+            "77"
+        ]
+    },
+    "48": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "49"
+    },
+    "49": {
+        "type": 105,
+        "inputs": [
+            "50"
+        ],
+        "expr": "51",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "50": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "51": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "52",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "52": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "53",
+        "indexes": [
+            "54",
+            "57",
+            "58",
+            "67"
+        ]
+    },
+    "53": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "54": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 5,
+        "name": "i64"
+    },
+    "56": {
+        "type": 0,
+        "val": 4
+    },
+    "57": {
+        "type": 5,
+        "name": "c"
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "60"
+    },
+    "59": {
+        "type": 5,
+        "name": "i4"
+    },
+    "60": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "61",
+        "rhs": "62"
+    },
+    "61": {
+        "type": 0,
+        "val": 3
+    },
+    "62": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "63",
+        "rhs": "66"
+    },
+    "63": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 5,
+        "name": "i64"
+    },
+    "65": {
+        "type": 0,
+        "val": 2
+    },
+    "66": {
+        "type": 0,
+        "val": 2
+    },
+    "67": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "68",
+        "rhs": "73"
+    },
+    "68": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 0,
+        "val": 3
+    },
+    "70": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "71",
+        "rhs": "72"
+    },
+    "71": {
+        "type": 5,
+        "name": "i64"
+    },
+    "72": {
+        "type": 0,
+        "val": 2
+    },
+    "73": {
+        "type": 5,
+        "name": "i14"
+    },
+    "74": {
+        "type": 5,
+        "name": "i64"
+    },
+    "75": {
+        "type": 5,
+        "name": "c"
+    },
+    "76": {
+        "type": 5,
+        "name": "i4"
+    },
+    "77": {
+        "type": 5,
+        "name": "i14"
+    },
+    "78": {
+        "type": 5,
+        "name": "n"
+    },
+    "79": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "80",
+        "rhs": "87"
+    },
+    "80": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "81",
+        "rhs": "84"
+    },
+    "81": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "82",
+        "rhs": "83"
+    },
+    "82": {
+        "type": 0,
+        "val": 4
+    },
+    "83": {
+        "type": 5,
+        "name": "f"
+    },
+    "84": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "85",
+        "rhs": "86"
+    },
+    "85": {
+        "type": 0,
+        "val": 2
+    },
+    "86": {
+        "type": 5,
+        "name": "i3"
+    },
+    "87": {
+        "type": 5,
+        "name": "i13"
+    },
+    "88": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "89",
+        "rhs": "90"
+    },
+    "89": {
+        "type": 5,
+        "name": "h"
+    },
+    "90": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "91",
+        "rhs": "92"
+    },
+    "91": {
+        "type": 0,
+        "val": 3
+    },
+    "92": {
+        "type": 5,
+        "name": "i3"
+    },
+    "93": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "94",
+        "rhs": "95"
+    },
+    "94": {
+        "type": 5,
+        "name": "w"
+    },
+    "95": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "96",
+        "rhs": "97"
+    },
+    "96": {
+        "type": 0,
+        "val": 3
+    },
+    "97": {
+        "type": 5,
+        "name": "i13"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_14.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_14.expr
new file mode 100644
index 00000000..fe96f5ad
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_14.expr
@@ -0,0 +1,1392 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 4,
+        "name": "T7",
+        "shape": [
+            1,
+            64,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "1"
+    },
+    "1": {
+        "type": 105,
+        "inputs": [
+            "2"
+        ],
+        "expr": "78",
+        "outputShape": [
+            1,
+            64,
+            224,
+            224
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            1,
+            256,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 102,
+        "inputs": [
+            "4",
+            "5"
+        ],
+        "expr": "31",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "5": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "9",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "9": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "10",
+        "indexes": [
+            "11",
+            "14",
+            "15",
+            "24"
+        ]
+    },
+    "10": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "11": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i64"
+    },
+    "13": {
+        "type": 0,
+        "val": 4
+    },
+    "14": {
+        "type": 5,
+        "name": "c"
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i4"
+    },
+    "17": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "18",
+        "rhs": "19"
+    },
+    "18": {
+        "type": 0,
+        "val": 3
+    },
+    "19": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "20",
+        "rhs": "23"
+    },
+    "20": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "i64"
+    },
+    "22": {
+        "type": 0,
+        "val": 2
+    },
+    "23": {
+        "type": 0,
+        "val": 2
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "30"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 3
+    },
+    "27": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i64"
+    },
+    "29": {
+        "type": 0,
+        "val": 2
+    },
+    "30": {
+        "type": 5,
+        "name": "i14"
+    },
+    "31": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "32",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "i64": [
+                0,
+                256
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i62": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "47"
+    },
+    "33": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "34",
+        "indexes": [
+            "35",
+            "36",
+            "37",
+            "42"
+        ]
+    },
+    "34": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "35": {
+        "type": 5,
+        "name": "n"
+    },
+    "36": {
+        "type": 5,
+        "name": "c"
+    },
+    "37": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "38",
+        "rhs": "41"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "i4"
+    },
+    "40": {
+        "type": 5,
+        "name": "i58"
+    },
+    "41": {
+        "type": 0,
+        "val": -2
+    },
+    "42": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "43",
+        "rhs": "46"
+    },
+    "43": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "i14"
+    },
+    "45": {
+        "type": 5,
+        "name": "i62"
+    },
+    "46": {
+        "type": 0,
+        "val": -2
+    },
+    "47": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "48",
+        "indexes": [
+            "74",
+            "75",
+            "76",
+            "77"
+        ]
+    },
+    "48": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "49"
+    },
+    "49": {
+        "type": 105,
+        "inputs": [
+            "50"
+        ],
+        "expr": "51",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "50": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "51": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "52",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "52": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "53",
+        "indexes": [
+            "54",
+            "57",
+            "58",
+            "67"
+        ]
+    },
+    "53": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "54": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 5,
+        "name": "i64"
+    },
+    "56": {
+        "type": 0,
+        "val": 4
+    },
+    "57": {
+        "type": 5,
+        "name": "c"
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "60"
+    },
+    "59": {
+        "type": 5,
+        "name": "i4"
+    },
+    "60": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "61",
+        "rhs": "62"
+    },
+    "61": {
+        "type": 0,
+        "val": 3
+    },
+    "62": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "63",
+        "rhs": "66"
+    },
+    "63": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 5,
+        "name": "i64"
+    },
+    "65": {
+        "type": 0,
+        "val": 2
+    },
+    "66": {
+        "type": 0,
+        "val": 2
+    },
+    "67": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "68",
+        "rhs": "73"
+    },
+    "68": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 0,
+        "val": 3
+    },
+    "70": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "71",
+        "rhs": "72"
+    },
+    "71": {
+        "type": 5,
+        "name": "i64"
+    },
+    "72": {
+        "type": 0,
+        "val": 2
+    },
+    "73": {
+        "type": 5,
+        "name": "i14"
+    },
+    "74": {
+        "type": 5,
+        "name": "i64"
+    },
+    "75": {
+        "type": 5,
+        "name": "c"
+    },
+    "76": {
+        "type": 5,
+        "name": "i4"
+    },
+    "77": {
+        "type": 5,
+        "name": "i14"
+    },
+    "78": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "79",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "79": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "80",
+        "indexes": [
+            "156",
+            "157",
+            "166",
+            "171"
+        ]
+    },
+    "80": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            1,
+            256,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            1,
+            1
+        ],
+        "source": "81"
+    },
+    "81": {
+        "type": 102,
+        "inputs": [
+            "82",
+            "83"
+        ],
+        "expr": "109",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "82": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "83": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "84"
+    },
+    "84": {
+        "type": 105,
+        "inputs": [
+            "85"
+        ],
+        "expr": "86",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "85": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "86": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "87",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "87": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "88",
+        "indexes": [
+            "89",
+            "92",
+            "93",
+            "102"
+        ]
+    },
+    "88": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "89": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "90",
+        "rhs": "91"
+    },
+    "90": {
+        "type": 5,
+        "name": "i64"
+    },
+    "91": {
+        "type": 0,
+        "val": 4
+    },
+    "92": {
+        "type": 5,
+        "name": "c"
+    },
+    "93": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "94",
+        "rhs": "95"
+    },
+    "94": {
+        "type": 5,
+        "name": "i4"
+    },
+    "95": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "96",
+        "rhs": "97"
+    },
+    "96": {
+        "type": 0,
+        "val": 3
+    },
+    "97": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "98",
+        "rhs": "101"
+    },
+    "98": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "99",
+        "rhs": "100"
+    },
+    "99": {
+        "type": 5,
+        "name": "i64"
+    },
+    "100": {
+        "type": 0,
+        "val": 2
+    },
+    "101": {
+        "type": 0,
+        "val": 2
+    },
+    "102": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "103",
+        "rhs": "108"
+    },
+    "103": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "104",
+        "rhs": "105"
+    },
+    "104": {
+        "type": 0,
+        "val": 3
+    },
+    "105": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "106",
+        "rhs": "107"
+    },
+    "106": {
+        "type": 5,
+        "name": "i64"
+    },
+    "107": {
+        "type": 0,
+        "val": 2
+    },
+    "108": {
+        "type": 5,
+        "name": "i14"
+    },
+    "109": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "110",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "i64": [
+                0,
+                256
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i62": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "110": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "111",
+        "rhs": "125"
+    },
+    "111": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "112",
+        "indexes": [
+            "113",
+            "114",
+            "115",
+            "120"
+        ]
+    },
+    "112": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "113": {
+        "type": 5,
+        "name": "n"
+    },
+    "114": {
+        "type": 5,
+        "name": "c"
+    },
+    "115": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "116",
+        "rhs": "119"
+    },
+    "116": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "117",
+        "rhs": "118"
+    },
+    "117": {
+        "type": 5,
+        "name": "i4"
+    },
+    "118": {
+        "type": 5,
+        "name": "i58"
+    },
+    "119": {
+        "type": 0,
+        "val": -2
+    },
+    "120": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "121",
+        "rhs": "124"
+    },
+    "121": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "122",
+        "rhs": "123"
+    },
+    "122": {
+        "type": 5,
+        "name": "i14"
+    },
+    "123": {
+        "type": 5,
+        "name": "i62"
+    },
+    "124": {
+        "type": 0,
+        "val": -2
+    },
+    "125": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "126",
+        "indexes": [
+            "152",
+            "153",
+            "154",
+            "155"
+        ]
+    },
+    "126": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "127"
+    },
+    "127": {
+        "type": 105,
+        "inputs": [
+            "128"
+        ],
+        "expr": "129",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "128": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "129": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "130",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "130": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "131",
+        "indexes": [
+            "132",
+            "135",
+            "136",
+            "145"
+        ]
+    },
+    "131": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "132": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "133",
+        "rhs": "134"
+    },
+    "133": {
+        "type": 5,
+        "name": "i64"
+    },
+    "134": {
+        "type": 0,
+        "val": 4
+    },
+    "135": {
+        "type": 5,
+        "name": "c"
+    },
+    "136": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "137",
+        "rhs": "138"
+    },
+    "137": {
+        "type": 5,
+        "name": "i4"
+    },
+    "138": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "139",
+        "rhs": "140"
+    },
+    "139": {
+        "type": 0,
+        "val": 3
+    },
+    "140": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "141",
+        "rhs": "144"
+    },
+    "141": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "142",
+        "rhs": "143"
+    },
+    "142": {
+        "type": 5,
+        "name": "i64"
+    },
+    "143": {
+        "type": 0,
+        "val": 2
+    },
+    "144": {
+        "type": 0,
+        "val": 2
+    },
+    "145": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "146",
+        "rhs": "151"
+    },
+    "146": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "147",
+        "rhs": "148"
+    },
+    "147": {
+        "type": 0,
+        "val": 3
+    },
+    "148": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "149",
+        "rhs": "150"
+    },
+    "149": {
+        "type": 5,
+        "name": "i64"
+    },
+    "150": {
+        "type": 0,
+        "val": 2
+    },
+    "151": {
+        "type": 5,
+        "name": "i14"
+    },
+    "152": {
+        "type": 5,
+        "name": "i64"
+    },
+    "153": {
+        "type": 5,
+        "name": "c"
+    },
+    "154": {
+        "type": 5,
+        "name": "i4"
+    },
+    "155": {
+        "type": 5,
+        "name": "i14"
+    },
+    "156": {
+        "type": 5,
+        "name": "n"
+    },
+    "157": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "158",
+        "rhs": "165"
+    },
+    "158": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "159",
+        "rhs": "162"
+    },
+    "159": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "160",
+        "rhs": "161"
+    },
+    "160": {
+        "type": 0,
+        "val": 4
+    },
+    "161": {
+        "type": 5,
+        "name": "f"
+    },
+    "162": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "163",
+        "rhs": "164"
+    },
+    "163": {
+        "type": 0,
+        "val": 2
+    },
+    "164": {
+        "type": 5,
+        "name": "i3"
+    },
+    "165": {
+        "type": 5,
+        "name": "i13"
+    },
+    "166": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "167",
+        "rhs": "168"
+    },
+    "167": {
+        "type": 5,
+        "name": "h"
+    },
+    "168": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "169",
+        "rhs": "170"
+    },
+    "169": {
+        "type": 0,
+        "val": 3
+    },
+    "170": {
+        "type": 5,
+        "name": "i3"
+    },
+    "171": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "172",
+        "rhs": "173"
+    },
+    "172": {
+        "type": 5,
+        "name": "w"
+    },
+    "173": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "174",
+        "rhs": "175"
+    },
+    "174": {
+        "type": 0,
+        "val": 3
+    },
+    "175": {
+        "type": 5,
+        "name": "i13"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_2.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_2.expr
new file mode 100644
index 00000000..26f87f2a
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_2.expr
@@ -0,0 +1,267 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i14": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                2
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "24"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "15"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "14"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "h"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "13"
+    },
+    "10": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 0,
+        "val": 3
+    },
+    "12": {
+        "type": 5,
+        "name": "i3"
+    },
+    "13": {
+        "type": 5,
+        "name": "i4"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "23"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "w"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "22"
+    },
+    "19": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 0,
+        "val": 3
+    },
+    "21": {
+        "type": 5,
+        "name": "i13"
+    },
+    "22": {
+        "type": 5,
+        "name": "i14"
+    },
+    "23": {
+        "type": 0,
+        "val": -2
+    },
+    "24": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "25",
+        "indexes": [
+            "26",
+            "27",
+            "28",
+            "33"
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "c"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "32"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 5,
+        "name": "i3"
+    },
+    "32": {
+        "type": 5,
+        "name": "i4"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "37"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i14"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_3.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_3.expr
new file mode 100644
index 00000000..9b36662a
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_3.expr
@@ -0,0 +1,344 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "40",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "26"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "17"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "16"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "h"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "15"
+    },
+    "12": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 0,
+        "val": 3
+    },
+    "14": {
+        "type": 5,
+        "name": "i3"
+    },
+    "15": {
+        "type": 5,
+        "name": "i4"
+    },
+    "16": {
+        "type": 0,
+        "val": -2
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "25"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "w"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "24"
+    },
+    "21": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 0,
+        "val": 3
+    },
+    "23": {
+        "type": 5,
+        "name": "i13"
+    },
+    "24": {
+        "type": 5,
+        "name": "i14"
+    },
+    "25": {
+        "type": 0,
+        "val": -2
+    },
+    "26": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "27",
+        "indexes": [
+            "28",
+            "29",
+            "30",
+            "35"
+        ]
+    },
+    "27": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 5,
+        "name": "c"
+    },
+    "30": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "31",
+        "rhs": "34"
+    },
+    "31": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 0,
+        "val": 3
+    },
+    "33": {
+        "type": 5,
+        "name": "i3"
+    },
+    "34": {
+        "type": 5,
+        "name": "i4"
+    },
+    "35": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 0,
+        "val": 3
+    },
+    "38": {
+        "type": 5,
+        "name": "i13"
+    },
+    "39": {
+        "type": 5,
+        "name": "i14"
+    },
+    "40": {
+        "type": 5,
+        "name": "i13"
+    },
+    "41": {
+        "type": 5,
+        "name": "i3"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "f"
+    },
+    "44": {
+        "type": 5,
+        "name": "h"
+    },
+    "45": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_4.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_4.expr
new file mode 100644
index 00000000..2d3e4a58
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_4.expr
@@ -0,0 +1,419 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "48",
+            "49",
+            "50",
+            "51",
+            "52",
+            "53"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "38",
+            "43",
+            "44",
+            "45",
+            "46",
+            "47"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i58": [
+                0,
+                227
+            ],
+            "i59": [
+                0,
+                2
+            ],
+            "i13": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "24"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i58"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "23"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "22"
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "21"
+    },
+    "18": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 0,
+        "val": 3
+    },
+    "20": {
+        "type": 5,
+        "name": "i13"
+    },
+    "21": {
+        "type": 5,
+        "name": "i14"
+    },
+    "22": {
+        "type": 5,
+        "name": "w"
+    },
+    "23": {
+        "type": 0,
+        "val": -2
+    },
+    "24": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "25",
+        "indexes": [
+            "26",
+            "27",
+            "28",
+            "33"
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "c"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "i4"
+    },
+    "30": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 0,
+        "val": 3
+    },
+    "32": {
+        "type": 5,
+        "name": "i59"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "37"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i14"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "h"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 5,
+        "name": "i3"
+    },
+    "43": {
+        "type": 5,
+        "name": "i3"
+    },
+    "44": {
+        "type": 5,
+        "name": "i13"
+    },
+    "45": {
+        "type": 5,
+        "name": "n"
+    },
+    "46": {
+        "type": 5,
+        "name": "f"
+    },
+    "47": {
+        "type": 5,
+        "name": "w"
+    },
+    "48": {
+        "type": 5,
+        "name": "i13"
+    },
+    "49": {
+        "type": 5,
+        "name": "i3"
+    },
+    "50": {
+        "type": 5,
+        "name": "n"
+    },
+    "51": {
+        "type": 5,
+        "name": "f"
+    },
+    "52": {
+        "type": 5,
+        "name": "h"
+    },
+    "53": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_5.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_5.expr
new file mode 100644
index 00000000..3189d472
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_5.expr
@@ -0,0 +1,344 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "36",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i58": [
+                0,
+                227
+            ],
+            "i59": [
+                0,
+                2
+            ],
+            "i13": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "22"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "12"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i4"
+    },
+    "11": {
+        "type": 5,
+        "name": "i58"
+    },
+    "12": {
+        "type": 0,
+        "val": -2
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "21"
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "20"
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 0,
+        "val": 3
+    },
+    "18": {
+        "type": 5,
+        "name": "i13"
+    },
+    "19": {
+        "type": 5,
+        "name": "i14"
+    },
+    "20": {
+        "type": 5,
+        "name": "w"
+    },
+    "21": {
+        "type": 0,
+        "val": -2
+    },
+    "22": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "23",
+        "indexes": [
+            "24",
+            "25",
+            "26",
+            "31"
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "24": {
+        "type": 5,
+        "name": "f"
+    },
+    "25": {
+        "type": 5,
+        "name": "c"
+    },
+    "26": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 5,
+        "name": "i4"
+    },
+    "28": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 0,
+        "val": 3
+    },
+    "30": {
+        "type": 5,
+        "name": "i59"
+    },
+    "31": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "32",
+        "rhs": "35"
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 0,
+        "val": 3
+    },
+    "34": {
+        "type": 5,
+        "name": "i13"
+    },
+    "35": {
+        "type": 5,
+        "name": "i14"
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "h"
+    },
+    "38": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 0,
+        "val": 3
+    },
+    "40": {
+        "type": 5,
+        "name": "i3"
+    },
+    "41": {
+        "type": 5,
+        "name": "i3"
+    },
+    "42": {
+        "type": 5,
+        "name": "i13"
+    },
+    "43": {
+        "type": 5,
+        "name": "n"
+    },
+    "44": {
+        "type": 5,
+        "name": "f"
+    },
+    "45": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_6.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_6.expr
new file mode 100644
index 00000000..1008a227
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_6.expr
@@ -0,0 +1,419 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "44",
+            "49",
+            "50",
+            "51",
+            "52",
+            "53"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i58": [
+                0,
+                227
+            ],
+            "i59": [
+                0,
+                2
+            ],
+            "i13": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "w": [
+                0,
+                224
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "34",
+            "39",
+            "40",
+            "41",
+            "42",
+            "43"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i62": [
+                0,
+                227
+            ],
+            "i63": [
+                0,
+                2
+            ],
+            "i58": [
+                0,
+                227
+            ],
+            "i59": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "20"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i58"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i14"
+    },
+    "18": {
+        "type": 5,
+        "name": "i62"
+    },
+    "19": {
+        "type": 0,
+        "val": -2
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "22",
+            "23",
+            "24",
+            "29"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "22": {
+        "type": 5,
+        "name": "f"
+    },
+    "23": {
+        "type": 5,
+        "name": "c"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "i4"
+    },
+    "26": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 0,
+        "val": 3
+    },
+    "28": {
+        "type": 5,
+        "name": "i59"
+    },
+    "29": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "i14"
+    },
+    "31": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 0,
+        "val": 3
+    },
+    "33": {
+        "type": 5,
+        "name": "i63"
+    },
+    "34": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "w"
+    },
+    "36": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 0,
+        "val": 3
+    },
+    "38": {
+        "type": 5,
+        "name": "i13"
+    },
+    "39": {
+        "type": 5,
+        "name": "i13"
+    },
+    "40": {
+        "type": 5,
+        "name": "i58"
+    },
+    "41": {
+        "type": 5,
+        "name": "i59"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "f"
+    },
+    "44": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "45",
+        "rhs": "46"
+    },
+    "45": {
+        "type": 5,
+        "name": "h"
+    },
+    "46": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "47",
+        "rhs": "48"
+    },
+    "47": {
+        "type": 0,
+        "val": 3
+    },
+    "48": {
+        "type": 5,
+        "name": "i3"
+    },
+    "49": {
+        "type": 5,
+        "name": "i3"
+    },
+    "50": {
+        "type": 5,
+        "name": "i13"
+    },
+    "51": {
+        "type": 5,
+        "name": "n"
+    },
+    "52": {
+        "type": 5,
+        "name": "f"
+    },
+    "53": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_7.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_7.expr
new file mode 100644
index 00000000..7acc2620
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_7.expr
@@ -0,0 +1,344 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "32",
+            "37",
+            "38",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                0,
+                227
+            ],
+            "i63": [
+                0,
+                2
+            ],
+            "i58": [
+                0,
+                227
+            ],
+            "i59": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "18"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "12"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i4"
+    },
+    "11": {
+        "type": 5,
+        "name": "i58"
+    },
+    "12": {
+        "type": 0,
+        "val": -2
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "17"
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i14"
+    },
+    "16": {
+        "type": 5,
+        "name": "i62"
+    },
+    "17": {
+        "type": 0,
+        "val": -2
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "21",
+            "22",
+            "27"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "i4"
+    },
+    "24": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 0,
+        "val": 3
+    },
+    "26": {
+        "type": 5,
+        "name": "i59"
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i14"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 5,
+        "name": "i63"
+    },
+    "32": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "w"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i13"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "h"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 5,
+        "name": "i3"
+    },
+    "43": {
+        "type": 5,
+        "name": "i3"
+    },
+    "44": {
+        "type": 5,
+        "name": "n"
+    },
+    "45": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_8.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_8.expr
new file mode 100644
index 00000000..ab54148b
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_8.expr
@@ -0,0 +1,344 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "32",
+            "37",
+            "38",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                2
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i59": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "18"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "12"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i4"
+    },
+    "11": {
+        "type": 5,
+        "name": "i58"
+    },
+    "12": {
+        "type": 0,
+        "val": -2
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "17"
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i14"
+    },
+    "16": {
+        "type": 5,
+        "name": "i62"
+    },
+    "17": {
+        "type": 0,
+        "val": -2
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "21",
+            "22",
+            "27"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "i4"
+    },
+    "24": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 0,
+        "val": 3
+    },
+    "26": {
+        "type": 5,
+        "name": "i59"
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i14"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 5,
+        "name": "i63"
+    },
+    "32": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "w"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i13"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "h"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 5,
+        "name": "i3"
+    },
+    "43": {
+        "type": 5,
+        "name": "i3"
+    },
+    "44": {
+        "type": 5,
+        "name": "n"
+    },
+    "45": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_9.expr b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_9.expr
new file mode 100644
index 00000000..840ea10e
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_9.expr
@@ -0,0 +1,581 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                2
+            ],
+            "i3": [
+                0,
+                2
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "63",
+            "68",
+            "69",
+            "74",
+            "75",
+            "76"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                0,
+                226
+            ],
+            "i63": [
+                0,
+                2
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "i59": [
+                0,
+                2
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                64
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "51",
+            "60",
+            "61",
+            "62"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "i62": [
+                0,
+                226
+            ],
+            "i58": [
+                0,
+                226
+            ],
+            "n": [
+                0,
+                1
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "20"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            3,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i58"
+    },
+    "14": {
+        "type": 0,
+        "val": -2
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i14"
+    },
+    "18": {
+        "type": 5,
+        "name": "i62"
+    },
+    "19": {
+        "type": 0,
+        "val": -2
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "47",
+            "48",
+            "49",
+            "50"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            256,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "22"
+    },
+    "22": {
+        "type": 105,
+        "inputs": [
+            "23"
+        ],
+        "expr": "24",
+        "outputShape": [
+            256,
+            16,
+            3,
+            3
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "24": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "25",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                256
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "25": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "26",
+        "indexes": [
+            "27",
+            "30",
+            "31",
+            "40"
+        ]
+    },
+    "26": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            64,
+            16,
+            6,
+            6
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "27": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i64"
+    },
+    "29": {
+        "type": 0,
+        "val": 4
+    },
+    "30": {
+        "type": 5,
+        "name": "c"
+    },
+    "31": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "i4"
+    },
+    "33": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 0,
+        "val": 3
+    },
+    "35": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i64"
+    },
+    "38": {
+        "type": 0,
+        "val": 2
+    },
+    "39": {
+        "type": 0,
+        "val": 2
+    },
+    "40": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "41",
+        "rhs": "46"
+    },
+    "41": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 0,
+        "val": 3
+    },
+    "43": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "i64"
+    },
+    "45": {
+        "type": 0,
+        "val": 2
+    },
+    "46": {
+        "type": 5,
+        "name": "i14"
+    },
+    "47": {
+        "type": 5,
+        "name": "i64"
+    },
+    "48": {
+        "type": 5,
+        "name": "c"
+    },
+    "49": {
+        "type": 5,
+        "name": "i4"
+    },
+    "50": {
+        "type": 5,
+        "name": "i14"
+    },
+    "51": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "52",
+        "rhs": "59"
+    },
+    "52": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "53",
+        "rhs": "56"
+    },
+    "53": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 0,
+        "val": 4
+    },
+    "55": {
+        "type": 5,
+        "name": "f"
+    },
+    "56": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "57",
+        "rhs": "58"
+    },
+    "57": {
+        "type": 0,
+        "val": 2
+    },
+    "58": {
+        "type": 5,
+        "name": "i59"
+    },
+    "59": {
+        "type": 5,
+        "name": "i63"
+    },
+    "60": {
+        "type": 5,
+        "name": "i62"
+    },
+    "61": {
+        "type": 5,
+        "name": "i58"
+    },
+    "62": {
+        "type": 5,
+        "name": "n"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 5,
+        "name": "w"
+    },
+    "65": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 0,
+        "val": 3
+    },
+    "67": {
+        "type": 5,
+        "name": "i13"
+    },
+    "68": {
+        "type": 5,
+        "name": "i13"
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 5,
+        "name": "h"
+    },
+    "71": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "72",
+        "rhs": "73"
+    },
+    "72": {
+        "type": 0,
+        "val": 3
+    },
+    "73": {
+        "type": 5,
+        "name": "i3"
+    },
+    "74": {
+        "type": 5,
+        "name": "i3"
+    },
+    "75": {
+        "type": 5,
+        "name": "n"
+    },
+    "76": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_0.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_0.expr
new file mode 100644
index 00000000..b37977dc
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_0.expr
@@ -0,0 +1,180 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 0 Init: ruleBasedDFS: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                16
+            ],
+            "r": [
+                0,
+                9
+            ],
+            "s": [
+                0,
+                9
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "16"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "11"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "10"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "h"
+    },
+    "9": {
+        "type": 5,
+        "name": "r"
+    },
+    "10": {
+        "type": 0,
+        "val": -4
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "15"
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "w"
+    },
+    "14": {
+        "type": 5,
+        "name": "s"
+    },
+    "15": {
+        "type": 0,
+        "val": -4
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "f"
+    },
+    "19": {
+        "type": 5,
+        "name": "c"
+    },
+    "20": {
+        "type": 5,
+        "name": "r"
+    },
+    "21": {
+        "type": 5,
+        "name": "s"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_1.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_1.expr
new file mode 100644
index 00000000..5055ff41
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_1.expr
@@ -0,0 +1,224 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 1 Rule1VariableSplit: Old iters: [r], new iters: [i3,i4] phis: [] psis: [((3 * i3) + i4)]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i3": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "s": [
+                0,
+                9
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "20"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "15"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "14"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "h"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "13"
+    },
+    "10": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 0,
+        "val": 3
+    },
+    "12": {
+        "type": 5,
+        "name": "i3"
+    },
+    "13": {
+        "type": 5,
+        "name": "i4"
+    },
+    "14": {
+        "type": 0,
+        "val": -4
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "w"
+    },
+    "18": {
+        "type": 5,
+        "name": "s"
+    },
+    "19": {
+        "type": 0,
+        "val": -4
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "22",
+            "23",
+            "24",
+            "29"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "22": {
+        "type": 5,
+        "name": "f"
+    },
+    "23": {
+        "type": 5,
+        "name": "c"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "28"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 3
+    },
+    "27": {
+        "type": 5,
+        "name": "i3"
+    },
+    "28": {
+        "type": 5,
+        "name": "i4"
+    },
+    "29": {
+        "type": 5,
+        "name": "s"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_10.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_10.expr
new file mode 100644
index 00000000..e2cc2918
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_10.expr
@@ -0,0 +1,637 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 10 Rule8GuidedDLT: Toward Conv. guidedDLTDLMismatch ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "69",
+            "74",
+            "75",
+            "80",
+            "81",
+            "82"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            2,
+            0,
+            2,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                2,
+                228
+            ],
+            "i63": [
+                0,
+                3
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "57",
+            "66",
+            "67",
+            "68"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "i62": [
+                2,
+                228
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "n": [
+                0,
+                8
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "6",
+        "indexes": [
+            "53",
+            "54",
+            "55",
+            "56"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "i64": [
+                0,
+                288
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "i62": [
+                2,
+                228
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "22"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "9",
+        "indexes": [
+            "10",
+            "11",
+            "12",
+            "17"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 5,
+        "name": "n"
+    },
+    "11": {
+        "type": 5,
+        "name": "c"
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "16"
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "i4"
+    },
+    "15": {
+        "type": 5,
+        "name": "i58"
+    },
+    "16": {
+        "type": 0,
+        "val": -4
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "21"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i14"
+    },
+    "20": {
+        "type": 5,
+        "name": "i62"
+    },
+    "21": {
+        "type": 0,
+        "val": -4
+    },
+    "22": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "23",
+        "indexes": [
+            "49",
+            "50",
+            "51",
+            "52"
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "24"
+    },
+    "24": {
+        "type": 105,
+        "inputs": [
+            "25"
+        ],
+        "expr": "26",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "27",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "27": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "28",
+        "indexes": [
+            "29",
+            "32",
+            "33",
+            "42"
+        ]
+    },
+    "28": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "29": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "i64"
+    },
+    "31": {
+        "type": 0,
+        "val": 9
+    },
+    "32": {
+        "type": 5,
+        "name": "c"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 5,
+        "name": "i4"
+    },
+    "35": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "36",
+        "rhs": "37"
+    },
+    "36": {
+        "type": 0,
+        "val": 3
+    },
+    "37": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "38",
+        "rhs": "41"
+    },
+    "38": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "i64"
+    },
+    "40": {
+        "type": 0,
+        "val": 3
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "43",
+        "rhs": "48"
+    },
+    "43": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 0,
+        "val": 3
+    },
+    "45": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "46",
+        "rhs": "47"
+    },
+    "46": {
+        "type": 5,
+        "name": "i64"
+    },
+    "47": {
+        "type": 0,
+        "val": 3
+    },
+    "48": {
+        "type": 5,
+        "name": "i14"
+    },
+    "49": {
+        "type": 5,
+        "name": "i64"
+    },
+    "50": {
+        "type": 5,
+        "name": "c"
+    },
+    "51": {
+        "type": 5,
+        "name": "i4"
+    },
+    "52": {
+        "type": 5,
+        "name": "i14"
+    },
+    "53": {
+        "type": 5,
+        "name": "n"
+    },
+    "54": {
+        "type": 5,
+        "name": "i64"
+    },
+    "55": {
+        "type": 5,
+        "name": "i58"
+    },
+    "56": {
+        "type": 5,
+        "name": "i62"
+    },
+    "57": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "58",
+        "rhs": "65"
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "62"
+    },
+    "59": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 0,
+        "val": 9
+    },
+    "61": {
+        "type": 5,
+        "name": "f"
+    },
+    "62": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "63",
+        "rhs": "64"
+    },
+    "63": {
+        "type": 0,
+        "val": 3
+    },
+    "64": {
+        "type": 5,
+        "name": "i59"
+    },
+    "65": {
+        "type": 5,
+        "name": "i63"
+    },
+    "66": {
+        "type": 5,
+        "name": "i62"
+    },
+    "67": {
+        "type": 5,
+        "name": "i58"
+    },
+    "68": {
+        "type": 5,
+        "name": "n"
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 5,
+        "name": "w"
+    },
+    "71": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "72",
+        "rhs": "73"
+    },
+    "72": {
+        "type": 0,
+        "val": 3
+    },
+    "73": {
+        "type": 5,
+        "name": "i13"
+    },
+    "74": {
+        "type": 5,
+        "name": "i13"
+    },
+    "75": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "76",
+        "rhs": "77"
+    },
+    "76": {
+        "type": 5,
+        "name": "h"
+    },
+    "77": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "78",
+        "rhs": "79"
+    },
+    "78": {
+        "type": 0,
+        "val": 3
+    },
+    "79": {
+        "type": 5,
+        "name": "i3"
+    },
+    "80": {
+        "type": 5,
+        "name": "i3"
+    },
+    "81": {
+        "type": 5,
+        "name": "n"
+    },
+    "82": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_11.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_11.expr
new file mode 100644
index 00000000..efa80c56
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_11.expr
@@ -0,0 +1,582 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 11 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "63",
+            "68",
+            "69",
+            "74",
+            "75",
+            "76"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            2,
+            0,
+            2,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                2,
+                228
+            ],
+            "i63": [
+                0,
+                3
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "51",
+            "52",
+            "61",
+            "62"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "i64": [
+                0,
+                288
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "i62": [
+                2,
+                228
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "20"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i58"
+    },
+    "14": {
+        "type": 0,
+        "val": -4
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i14"
+    },
+    "18": {
+        "type": 5,
+        "name": "i62"
+    },
+    "19": {
+        "type": 0,
+        "val": -4
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "47",
+            "48",
+            "49",
+            "50"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "22"
+    },
+    "22": {
+        "type": 105,
+        "inputs": [
+            "23"
+        ],
+        "expr": "24",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "24": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "25",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "25": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "26",
+        "indexes": [
+            "27",
+            "30",
+            "31",
+            "40"
+        ]
+    },
+    "26": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "27": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i64"
+    },
+    "29": {
+        "type": 0,
+        "val": 9
+    },
+    "30": {
+        "type": 5,
+        "name": "c"
+    },
+    "31": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "i4"
+    },
+    "33": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 0,
+        "val": 3
+    },
+    "35": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i64"
+    },
+    "38": {
+        "type": 0,
+        "val": 3
+    },
+    "39": {
+        "type": 0,
+        "val": 3
+    },
+    "40": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "41",
+        "rhs": "46"
+    },
+    "41": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 0,
+        "val": 3
+    },
+    "43": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "i64"
+    },
+    "45": {
+        "type": 0,
+        "val": 3
+    },
+    "46": {
+        "type": 5,
+        "name": "i14"
+    },
+    "47": {
+        "type": 5,
+        "name": "i64"
+    },
+    "48": {
+        "type": 5,
+        "name": "c"
+    },
+    "49": {
+        "type": 5,
+        "name": "i4"
+    },
+    "50": {
+        "type": 5,
+        "name": "i14"
+    },
+    "51": {
+        "type": 5,
+        "name": "n"
+    },
+    "52": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "53",
+        "rhs": "60"
+    },
+    "53": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "54",
+        "rhs": "57"
+    },
+    "54": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 0,
+        "val": 9
+    },
+    "56": {
+        "type": 5,
+        "name": "f"
+    },
+    "57": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "58",
+        "rhs": "59"
+    },
+    "58": {
+        "type": 0,
+        "val": 3
+    },
+    "59": {
+        "type": 5,
+        "name": "i59"
+    },
+    "60": {
+        "type": 5,
+        "name": "i63"
+    },
+    "61": {
+        "type": 5,
+        "name": "i58"
+    },
+    "62": {
+        "type": 5,
+        "name": "i62"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 5,
+        "name": "w"
+    },
+    "65": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 0,
+        "val": 3
+    },
+    "67": {
+        "type": 5,
+        "name": "i13"
+    },
+    "68": {
+        "type": 5,
+        "name": "i13"
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 5,
+        "name": "h"
+    },
+    "71": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "72",
+        "rhs": "73"
+    },
+    "72": {
+        "type": 0,
+        "val": 3
+    },
+    "73": {
+        "type": 5,
+        "name": "i3"
+    },
+    "74": {
+        "type": 5,
+        "name": "i3"
+    },
+    "75": {
+        "type": 5,
+        "name": "n"
+    },
+    "76": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_12.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_12.expr
new file mode 100644
index 00000000..d4ec9a43
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_12.expr
@@ -0,0 +1,906 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 12 Rule6KenerlMatching: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "102",
+            "107",
+            "108",
+            "113",
+            "114",
+            "115"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            2,
+            0,
+            2,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                2,
+                228
+            ],
+            "i63": [
+                0,
+                3
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "90",
+            "91",
+            "100",
+            "101"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "i64": [
+                0,
+                288
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "i62": [
+                2,
+                228
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "6",
+        "indexes": [
+            "82",
+            "83",
+            "84",
+            "87"
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            8,
+            288,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "7"
+    },
+    "7": {
+        "type": 102,
+        "inputs": [
+            "8",
+            "9"
+        ],
+        "expr": "35",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "8": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "9": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "10"
+    },
+    "10": {
+        "type": 105,
+        "inputs": [
+            "11"
+        ],
+        "expr": "12",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "13": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "14",
+        "indexes": [
+            "15",
+            "18",
+            "19",
+            "28"
+        ]
+    },
+    "14": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "15": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i64"
+    },
+    "17": {
+        "type": 0,
+        "val": 9
+    },
+    "18": {
+        "type": 5,
+        "name": "c"
+    },
+    "19": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 5,
+        "name": "i4"
+    },
+    "21": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 0,
+        "val": 3
+    },
+    "23": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "24",
+        "rhs": "27"
+    },
+    "24": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "i64"
+    },
+    "26": {
+        "type": 0,
+        "val": 3
+    },
+    "27": {
+        "type": 0,
+        "val": 3
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "34"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "i64"
+    },
+    "33": {
+        "type": 0,
+        "val": 3
+    },
+    "34": {
+        "type": 5,
+        "name": "i14"
+    },
+    "35": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "36",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "i64": [
+                0,
+                288
+            ],
+            "i65": [
+                0,
+                226
+            ],
+            "i66": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "36": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "37",
+        "rhs": "51"
+    },
+    "37": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "38",
+        "indexes": [
+            "39",
+            "40",
+            "41",
+            "46"
+        ]
+    },
+    "38": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "39": {
+        "type": 5,
+        "name": "n"
+    },
+    "40": {
+        "type": 5,
+        "name": "c"
+    },
+    "41": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "42",
+        "rhs": "45"
+    },
+    "42": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "43",
+        "rhs": "44"
+    },
+    "43": {
+        "type": 5,
+        "name": "i4"
+    },
+    "44": {
+        "type": 5,
+        "name": "i65"
+    },
+    "45": {
+        "type": 0,
+        "val": -2
+    },
+    "46": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "47",
+        "rhs": "50"
+    },
+    "47": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "48",
+        "rhs": "49"
+    },
+    "48": {
+        "type": 5,
+        "name": "i14"
+    },
+    "49": {
+        "type": 5,
+        "name": "i66"
+    },
+    "50": {
+        "type": 0,
+        "val": -2
+    },
+    "51": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "52",
+        "indexes": [
+            "78",
+            "79",
+            "80",
+            "81"
+        ]
+    },
+    "52": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "53"
+    },
+    "53": {
+        "type": 105,
+        "inputs": [
+            "54"
+        ],
+        "expr": "55",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "54": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "55": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "56",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "56": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "57",
+        "indexes": [
+            "58",
+            "61",
+            "62",
+            "71"
+        ]
+    },
+    "57": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "58": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "59",
+        "rhs": "60"
+    },
+    "59": {
+        "type": 5,
+        "name": "i64"
+    },
+    "60": {
+        "type": 0,
+        "val": 9
+    },
+    "61": {
+        "type": 5,
+        "name": "c"
+    },
+    "62": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "63",
+        "rhs": "64"
+    },
+    "63": {
+        "type": 5,
+        "name": "i4"
+    },
+    "64": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 0,
+        "val": 3
+    },
+    "66": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "67",
+        "rhs": "70"
+    },
+    "67": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "68",
+        "rhs": "69"
+    },
+    "68": {
+        "type": 5,
+        "name": "i64"
+    },
+    "69": {
+        "type": 0,
+        "val": 3
+    },
+    "70": {
+        "type": 0,
+        "val": 3
+    },
+    "71": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "72",
+        "rhs": "77"
+    },
+    "72": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "73",
+        "rhs": "74"
+    },
+    "73": {
+        "type": 0,
+        "val": 3
+    },
+    "74": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "75",
+        "rhs": "76"
+    },
+    "75": {
+        "type": 5,
+        "name": "i64"
+    },
+    "76": {
+        "type": 0,
+        "val": 3
+    },
+    "77": {
+        "type": 5,
+        "name": "i14"
+    },
+    "78": {
+        "type": 5,
+        "name": "i64"
+    },
+    "79": {
+        "type": 5,
+        "name": "c"
+    },
+    "80": {
+        "type": 5,
+        "name": "i4"
+    },
+    "81": {
+        "type": 5,
+        "name": "i14"
+    },
+    "82": {
+        "type": 5,
+        "name": "n"
+    },
+    "83": {
+        "type": 5,
+        "name": "i64"
+    },
+    "84": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "85",
+        "rhs": "86"
+    },
+    "85": {
+        "type": 5,
+        "name": "i58"
+    },
+    "86": {
+        "type": 0,
+        "val": -2
+    },
+    "87": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "88",
+        "rhs": "89"
+    },
+    "88": {
+        "type": 5,
+        "name": "i62"
+    },
+    "89": {
+        "type": 0,
+        "val": -2
+    },
+    "90": {
+        "type": 5,
+        "name": "n"
+    },
+    "91": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "92",
+        "rhs": "99"
+    },
+    "92": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "93",
+        "rhs": "96"
+    },
+    "93": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "94",
+        "rhs": "95"
+    },
+    "94": {
+        "type": 0,
+        "val": 9
+    },
+    "95": {
+        "type": 5,
+        "name": "f"
+    },
+    "96": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "97",
+        "rhs": "98"
+    },
+    "97": {
+        "type": 0,
+        "val": 3
+    },
+    "98": {
+        "type": 5,
+        "name": "i59"
+    },
+    "99": {
+        "type": 5,
+        "name": "i63"
+    },
+    "100": {
+        "type": 5,
+        "name": "i58"
+    },
+    "101": {
+        "type": 5,
+        "name": "i62"
+    },
+    "102": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "103",
+        "rhs": "104"
+    },
+    "103": {
+        "type": 5,
+        "name": "w"
+    },
+    "104": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "105",
+        "rhs": "106"
+    },
+    "105": {
+        "type": 0,
+        "val": 3
+    },
+    "106": {
+        "type": 5,
+        "name": "i13"
+    },
+    "107": {
+        "type": 5,
+        "name": "i13"
+    },
+    "108": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "109",
+        "rhs": "110"
+    },
+    "109": {
+        "type": 5,
+        "name": "h"
+    },
+    "110": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "111",
+        "rhs": "112"
+    },
+    "111": {
+        "type": 0,
+        "val": 3
+    },
+    "112": {
+        "type": 5,
+        "name": "i3"
+    },
+    "113": {
+        "type": 5,
+        "name": "i3"
+    },
+    "114": {
+        "type": 5,
+        "name": "n"
+    },
+    "115": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_13.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_13.expr
new file mode 100644
index 00000000..6f155e74
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_13.expr
@@ -0,0 +1,851 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 13 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "96",
+            "101",
+            "102",
+            "107",
+            "108",
+            "109"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            2,
+            0,
+            2,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                2,
+                228
+            ],
+            "i63": [
+                0,
+                3
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "80",
+            "81",
+            "90",
+            "93"
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            8,
+            288,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 102,
+        "inputs": [
+            "6",
+            "7"
+        ],
+        "expr": "33",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "7": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "8"
+    },
+    "8": {
+        "type": 105,
+        "inputs": [
+            "9"
+        ],
+        "expr": "10",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "11",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "11": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "12",
+        "indexes": [
+            "13",
+            "16",
+            "17",
+            "26"
+        ]
+    },
+    "12": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "13": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "i64"
+    },
+    "15": {
+        "type": 0,
+        "val": 9
+    },
+    "16": {
+        "type": 5,
+        "name": "c"
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "19"
+    },
+    "18": {
+        "type": 5,
+        "name": "i4"
+    },
+    "19": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 0,
+        "val": 3
+    },
+    "21": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "22",
+        "rhs": "25"
+    },
+    "22": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "i64"
+    },
+    "24": {
+        "type": 0,
+        "val": 3
+    },
+    "25": {
+        "type": 0,
+        "val": 3
+    },
+    "26": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "27",
+        "rhs": "32"
+    },
+    "27": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 0,
+        "val": 3
+    },
+    "29": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "i64"
+    },
+    "31": {
+        "type": 0,
+        "val": 3
+    },
+    "32": {
+        "type": 5,
+        "name": "i14"
+    },
+    "33": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "34",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "i64": [
+                0,
+                288
+            ],
+            "i65": [
+                0,
+                226
+            ],
+            "i66": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "49"
+    },
+    "35": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "36",
+        "indexes": [
+            "37",
+            "38",
+            "39",
+            "44"
+        ]
+    },
+    "36": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "37": {
+        "type": 5,
+        "name": "n"
+    },
+    "38": {
+        "type": 5,
+        "name": "c"
+    },
+    "39": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "40",
+        "rhs": "43"
+    },
+    "40": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 5,
+        "name": "i4"
+    },
+    "42": {
+        "type": 5,
+        "name": "i65"
+    },
+    "43": {
+        "type": 0,
+        "val": -2
+    },
+    "44": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "45",
+        "rhs": "48"
+    },
+    "45": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "46",
+        "rhs": "47"
+    },
+    "46": {
+        "type": 5,
+        "name": "i14"
+    },
+    "47": {
+        "type": 5,
+        "name": "i66"
+    },
+    "48": {
+        "type": 0,
+        "val": -2
+    },
+    "49": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "50",
+        "indexes": [
+            "76",
+            "77",
+            "78",
+            "79"
+        ]
+    },
+    "50": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "51"
+    },
+    "51": {
+        "type": 105,
+        "inputs": [
+            "52"
+        ],
+        "expr": "53",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "52": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "53": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "54",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "54": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "55",
+        "indexes": [
+            "56",
+            "59",
+            "60",
+            "69"
+        ]
+    },
+    "55": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "56": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "57",
+        "rhs": "58"
+    },
+    "57": {
+        "type": 5,
+        "name": "i64"
+    },
+    "58": {
+        "type": 0,
+        "val": 9
+    },
+    "59": {
+        "type": 5,
+        "name": "c"
+    },
+    "60": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "61",
+        "rhs": "62"
+    },
+    "61": {
+        "type": 5,
+        "name": "i4"
+    },
+    "62": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "63",
+        "rhs": "64"
+    },
+    "63": {
+        "type": 0,
+        "val": 3
+    },
+    "64": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "65",
+        "rhs": "68"
+    },
+    "65": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 5,
+        "name": "i64"
+    },
+    "67": {
+        "type": 0,
+        "val": 3
+    },
+    "68": {
+        "type": 0,
+        "val": 3
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "75"
+    },
+    "70": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "71",
+        "rhs": "72"
+    },
+    "71": {
+        "type": 0,
+        "val": 3
+    },
+    "72": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "73",
+        "rhs": "74"
+    },
+    "73": {
+        "type": 5,
+        "name": "i64"
+    },
+    "74": {
+        "type": 0,
+        "val": 3
+    },
+    "75": {
+        "type": 5,
+        "name": "i14"
+    },
+    "76": {
+        "type": 5,
+        "name": "i64"
+    },
+    "77": {
+        "type": 5,
+        "name": "c"
+    },
+    "78": {
+        "type": 5,
+        "name": "i4"
+    },
+    "79": {
+        "type": 5,
+        "name": "i14"
+    },
+    "80": {
+        "type": 5,
+        "name": "n"
+    },
+    "81": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "82",
+        "rhs": "89"
+    },
+    "82": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "83",
+        "rhs": "86"
+    },
+    "83": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "84",
+        "rhs": "85"
+    },
+    "84": {
+        "type": 0,
+        "val": 9
+    },
+    "85": {
+        "type": 5,
+        "name": "f"
+    },
+    "86": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "87",
+        "rhs": "88"
+    },
+    "87": {
+        "type": 0,
+        "val": 3
+    },
+    "88": {
+        "type": 5,
+        "name": "i59"
+    },
+    "89": {
+        "type": 5,
+        "name": "i63"
+    },
+    "90": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "91",
+        "rhs": "92"
+    },
+    "91": {
+        "type": 5,
+        "name": "i58"
+    },
+    "92": {
+        "type": 0,
+        "val": -2
+    },
+    "93": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "94",
+        "rhs": "95"
+    },
+    "94": {
+        "type": 5,
+        "name": "i62"
+    },
+    "95": {
+        "type": 0,
+        "val": -2
+    },
+    "96": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "97",
+        "rhs": "98"
+    },
+    "97": {
+        "type": 5,
+        "name": "w"
+    },
+    "98": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "99",
+        "rhs": "100"
+    },
+    "99": {
+        "type": 0,
+        "val": 3
+    },
+    "100": {
+        "type": 5,
+        "name": "i13"
+    },
+    "101": {
+        "type": 5,
+        "name": "i13"
+    },
+    "102": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "103",
+        "rhs": "104"
+    },
+    "103": {
+        "type": 5,
+        "name": "h"
+    },
+    "104": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "105",
+        "rhs": "106"
+    },
+    "105": {
+        "type": 0,
+        "val": 3
+    },
+    "106": {
+        "type": 5,
+        "name": "i3"
+    },
+    "107": {
+        "type": 5,
+        "name": "i3"
+    },
+    "108": {
+        "type": 5,
+        "name": "n"
+    },
+    "109": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_14.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_14.expr
new file mode 100644
index 00000000..ed2b12fd
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_14.expr
@@ -0,0 +1,1488 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 14 Rule90TwoStageElementWise: ",
+    "0": {
+        "type": 4,
+        "name": "T7",
+        "shape": [
+            8,
+            32,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "1"
+    },
+    "1": {
+        "type": 105,
+        "inputs": [
+            "2"
+        ],
+        "expr": "78",
+        "outputShape": [
+            8,
+            32,
+            224,
+            224
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            8,
+            288,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 102,
+        "inputs": [
+            "4",
+            "5"
+        ],
+        "expr": "31",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "5": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "9",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "9": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "10",
+        "indexes": [
+            "11",
+            "14",
+            "15",
+            "24"
+        ]
+    },
+    "10": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "11": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i64"
+    },
+    "13": {
+        "type": 0,
+        "val": 9
+    },
+    "14": {
+        "type": 5,
+        "name": "c"
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i4"
+    },
+    "17": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "18",
+        "rhs": "19"
+    },
+    "18": {
+        "type": 0,
+        "val": 3
+    },
+    "19": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "20",
+        "rhs": "23"
+    },
+    "20": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "i64"
+    },
+    "22": {
+        "type": 0,
+        "val": 3
+    },
+    "23": {
+        "type": 0,
+        "val": 3
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "30"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 3
+    },
+    "27": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i64"
+    },
+    "29": {
+        "type": 0,
+        "val": 3
+    },
+    "30": {
+        "type": 5,
+        "name": "i14"
+    },
+    "31": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "32",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "i64": [
+                0,
+                288
+            ],
+            "i65": [
+                0,
+                226
+            ],
+            "i66": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "47"
+    },
+    "33": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "34",
+        "indexes": [
+            "35",
+            "36",
+            "37",
+            "42"
+        ]
+    },
+    "34": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "35": {
+        "type": 5,
+        "name": "n"
+    },
+    "36": {
+        "type": 5,
+        "name": "c"
+    },
+    "37": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "38",
+        "rhs": "41"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "i4"
+    },
+    "40": {
+        "type": 5,
+        "name": "i65"
+    },
+    "41": {
+        "type": 0,
+        "val": -2
+    },
+    "42": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "43",
+        "rhs": "46"
+    },
+    "43": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "i14"
+    },
+    "45": {
+        "type": 5,
+        "name": "i66"
+    },
+    "46": {
+        "type": 0,
+        "val": -2
+    },
+    "47": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "48",
+        "indexes": [
+            "74",
+            "75",
+            "76",
+            "77"
+        ]
+    },
+    "48": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "49"
+    },
+    "49": {
+        "type": 105,
+        "inputs": [
+            "50"
+        ],
+        "expr": "51",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "50": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "51": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "52",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "52": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "53",
+        "indexes": [
+            "54",
+            "57",
+            "58",
+            "67"
+        ]
+    },
+    "53": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "54": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 5,
+        "name": "i64"
+    },
+    "56": {
+        "type": 0,
+        "val": 9
+    },
+    "57": {
+        "type": 5,
+        "name": "c"
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "60"
+    },
+    "59": {
+        "type": 5,
+        "name": "i4"
+    },
+    "60": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "61",
+        "rhs": "62"
+    },
+    "61": {
+        "type": 0,
+        "val": 3
+    },
+    "62": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "63",
+        "rhs": "66"
+    },
+    "63": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 5,
+        "name": "i64"
+    },
+    "65": {
+        "type": 0,
+        "val": 3
+    },
+    "66": {
+        "type": 0,
+        "val": 3
+    },
+    "67": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "68",
+        "rhs": "73"
+    },
+    "68": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 0,
+        "val": 3
+    },
+    "70": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "71",
+        "rhs": "72"
+    },
+    "71": {
+        "type": 5,
+        "name": "i64"
+    },
+    "72": {
+        "type": 0,
+        "val": 3
+    },
+    "73": {
+        "type": 5,
+        "name": "i14"
+    },
+    "74": {
+        "type": 5,
+        "name": "i64"
+    },
+    "75": {
+        "type": 5,
+        "name": "c"
+    },
+    "76": {
+        "type": 5,
+        "name": "i4"
+    },
+    "77": {
+        "type": 5,
+        "name": "i14"
+    },
+    "78": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "79",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "79": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "80",
+        "indexes": [
+            "174",
+            "179",
+            "180",
+            "185",
+            "186",
+            "187"
+        ]
+    },
+    "80": {
+        "type": 2,
+        "paddings": [
+            2,
+            0,
+            2,
+            0,
+            0,
+            0
+        ],
+        "summand": "81",
+        "loopVarRanges": {
+            "i62": [
+                2,
+                228
+            ],
+            "i63": [
+                0,
+                3
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ]
+        }
+    },
+    "81": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "82",
+        "indexes": [
+            "158",
+            "159",
+            "168",
+            "171"
+        ]
+    },
+    "82": {
+        "type": 4,
+        "name": "T6",
+        "shape": [
+            8,
+            288,
+            226,
+            226
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "83"
+    },
+    "83": {
+        "type": 102,
+        "inputs": [
+            "84",
+            "85"
+        ],
+        "expr": "111",
+        "args": [
+            2,
+            2,
+            1,
+            1,
+            1,
+            1
+        ]
+    },
+    "84": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "85": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "86"
+    },
+    "86": {
+        "type": 105,
+        "inputs": [
+            "87"
+        ],
+        "expr": "88",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "87": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "88": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "89",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "89": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "90",
+        "indexes": [
+            "91",
+            "94",
+            "95",
+            "104"
+        ]
+    },
+    "90": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "91": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "92",
+        "rhs": "93"
+    },
+    "92": {
+        "type": 5,
+        "name": "i64"
+    },
+    "93": {
+        "type": 0,
+        "val": 9
+    },
+    "94": {
+        "type": 5,
+        "name": "c"
+    },
+    "95": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "96",
+        "rhs": "97"
+    },
+    "96": {
+        "type": 5,
+        "name": "i4"
+    },
+    "97": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "98",
+        "rhs": "99"
+    },
+    "98": {
+        "type": 0,
+        "val": 3
+    },
+    "99": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "100",
+        "rhs": "103"
+    },
+    "100": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "101",
+        "rhs": "102"
+    },
+    "101": {
+        "type": 5,
+        "name": "i64"
+    },
+    "102": {
+        "type": 0,
+        "val": 3
+    },
+    "103": {
+        "type": 0,
+        "val": 3
+    },
+    "104": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "105",
+        "rhs": "110"
+    },
+    "105": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "106",
+        "rhs": "107"
+    },
+    "106": {
+        "type": 0,
+        "val": 3
+    },
+    "107": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "108",
+        "rhs": "109"
+    },
+    "108": {
+        "type": 5,
+        "name": "i64"
+    },
+    "109": {
+        "type": 0,
+        "val": 3
+    },
+    "110": {
+        "type": 5,
+        "name": "i14"
+    },
+    "111": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "112",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "i64": [
+                0,
+                288
+            ],
+            "i65": [
+                0,
+                226
+            ],
+            "i66": [
+                0,
+                226
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "112": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "113",
+        "rhs": "127"
+    },
+    "113": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "114",
+        "indexes": [
+            "115",
+            "116",
+            "117",
+            "122"
+        ]
+    },
+    "114": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "115": {
+        "type": 5,
+        "name": "n"
+    },
+    "116": {
+        "type": 5,
+        "name": "c"
+    },
+    "117": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "118",
+        "rhs": "121"
+    },
+    "118": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "119",
+        "rhs": "120"
+    },
+    "119": {
+        "type": 5,
+        "name": "i4"
+    },
+    "120": {
+        "type": 5,
+        "name": "i65"
+    },
+    "121": {
+        "type": 0,
+        "val": -2
+    },
+    "122": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "123",
+        "rhs": "126"
+    },
+    "123": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "124",
+        "rhs": "125"
+    },
+    "124": {
+        "type": 5,
+        "name": "i14"
+    },
+    "125": {
+        "type": 5,
+        "name": "i66"
+    },
+    "126": {
+        "type": 0,
+        "val": -2
+    },
+    "127": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "128",
+        "indexes": [
+            "154",
+            "155",
+            "156",
+            "157"
+        ]
+    },
+    "128": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "129"
+    },
+    "129": {
+        "type": 105,
+        "inputs": [
+            "130"
+        ],
+        "expr": "131",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "130": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "131": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "132",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "132": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "133",
+        "indexes": [
+            "134",
+            "137",
+            "138",
+            "147"
+        ]
+    },
+    "133": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "134": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "135",
+        "rhs": "136"
+    },
+    "135": {
+        "type": 5,
+        "name": "i64"
+    },
+    "136": {
+        "type": 0,
+        "val": 9
+    },
+    "137": {
+        "type": 5,
+        "name": "c"
+    },
+    "138": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "139",
+        "rhs": "140"
+    },
+    "139": {
+        "type": 5,
+        "name": "i4"
+    },
+    "140": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "141",
+        "rhs": "142"
+    },
+    "141": {
+        "type": 0,
+        "val": 3
+    },
+    "142": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "143",
+        "rhs": "146"
+    },
+    "143": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "144",
+        "rhs": "145"
+    },
+    "144": {
+        "type": 5,
+        "name": "i64"
+    },
+    "145": {
+        "type": 0,
+        "val": 3
+    },
+    "146": {
+        "type": 0,
+        "val": 3
+    },
+    "147": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "148",
+        "rhs": "153"
+    },
+    "148": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "149",
+        "rhs": "150"
+    },
+    "149": {
+        "type": 0,
+        "val": 3
+    },
+    "150": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "151",
+        "rhs": "152"
+    },
+    "151": {
+        "type": 5,
+        "name": "i64"
+    },
+    "152": {
+        "type": 0,
+        "val": 3
+    },
+    "153": {
+        "type": 5,
+        "name": "i14"
+    },
+    "154": {
+        "type": 5,
+        "name": "i64"
+    },
+    "155": {
+        "type": 5,
+        "name": "c"
+    },
+    "156": {
+        "type": 5,
+        "name": "i4"
+    },
+    "157": {
+        "type": 5,
+        "name": "i14"
+    },
+    "158": {
+        "type": 5,
+        "name": "n"
+    },
+    "159": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "160",
+        "rhs": "167"
+    },
+    "160": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "161",
+        "rhs": "164"
+    },
+    "161": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "162",
+        "rhs": "163"
+    },
+    "162": {
+        "type": 0,
+        "val": 9
+    },
+    "163": {
+        "type": 5,
+        "name": "f"
+    },
+    "164": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "165",
+        "rhs": "166"
+    },
+    "165": {
+        "type": 0,
+        "val": 3
+    },
+    "166": {
+        "type": 5,
+        "name": "i59"
+    },
+    "167": {
+        "type": 5,
+        "name": "i63"
+    },
+    "168": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "169",
+        "rhs": "170"
+    },
+    "169": {
+        "type": 5,
+        "name": "i58"
+    },
+    "170": {
+        "type": 0,
+        "val": -2
+    },
+    "171": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "172",
+        "rhs": "173"
+    },
+    "172": {
+        "type": 5,
+        "name": "i62"
+    },
+    "173": {
+        "type": 0,
+        "val": -2
+    },
+    "174": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "175",
+        "rhs": "176"
+    },
+    "175": {
+        "type": 5,
+        "name": "w"
+    },
+    "176": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "177",
+        "rhs": "178"
+    },
+    "177": {
+        "type": 0,
+        "val": 3
+    },
+    "178": {
+        "type": 5,
+        "name": "i13"
+    },
+    "179": {
+        "type": 5,
+        "name": "i13"
+    },
+    "180": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "181",
+        "rhs": "182"
+    },
+    "181": {
+        "type": 5,
+        "name": "h"
+    },
+    "182": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "183",
+        "rhs": "184"
+    },
+    "183": {
+        "type": 0,
+        "val": 3
+    },
+    "184": {
+        "type": 5,
+        "name": "i3"
+    },
+    "185": {
+        "type": 5,
+        "name": "i3"
+    },
+    "186": {
+        "type": 5,
+        "name": "n"
+    },
+    "187": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_2.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_2.expr
new file mode 100644
index 00000000..1699b9aa
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_2.expr
@@ -0,0 +1,268 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 2 Rule1VariableSplit: Old iters: [s], new iters: [i13,i14] phis: [] psis: [((3 * i13) + i14)]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "24"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "15"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "14"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "h"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "13"
+    },
+    "10": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 0,
+        "val": 3
+    },
+    "12": {
+        "type": 5,
+        "name": "i3"
+    },
+    "13": {
+        "type": 5,
+        "name": "i4"
+    },
+    "14": {
+        "type": 0,
+        "val": -4
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "23"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "w"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "22"
+    },
+    "19": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 0,
+        "val": 3
+    },
+    "21": {
+        "type": 5,
+        "name": "i13"
+    },
+    "22": {
+        "type": 5,
+        "name": "i14"
+    },
+    "23": {
+        "type": 0,
+        "val": -4
+    },
+    "24": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "25",
+        "indexes": [
+            "26",
+            "27",
+            "28",
+            "33"
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "c"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "32"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 5,
+        "name": "i3"
+    },
+    "32": {
+        "type": 5,
+        "name": "i4"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "37"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i14"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_3.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_3.expr
new file mode 100644
index 00000000..16f9d349
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_3.expr
@@ -0,0 +1,345 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 3 Rule3StageSplit: Separate sum iters: [i13,i3]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "40",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "26"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "17"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "16"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "h"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "15"
+    },
+    "12": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 0,
+        "val": 3
+    },
+    "14": {
+        "type": 5,
+        "name": "i3"
+    },
+    "15": {
+        "type": 5,
+        "name": "i4"
+    },
+    "16": {
+        "type": 0,
+        "val": -4
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "25"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "w"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "24"
+    },
+    "21": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 0,
+        "val": 3
+    },
+    "23": {
+        "type": 5,
+        "name": "i13"
+    },
+    "24": {
+        "type": 5,
+        "name": "i14"
+    },
+    "25": {
+        "type": 0,
+        "val": -4
+    },
+    "26": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "27",
+        "indexes": [
+            "28",
+            "29",
+            "30",
+            "35"
+        ]
+    },
+    "27": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 5,
+        "name": "c"
+    },
+    "30": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "31",
+        "rhs": "34"
+    },
+    "31": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 0,
+        "val": 3
+    },
+    "33": {
+        "type": 5,
+        "name": "i3"
+    },
+    "34": {
+        "type": 5,
+        "name": "i4"
+    },
+    "35": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 0,
+        "val": 3
+    },
+    "38": {
+        "type": 5,
+        "name": "i13"
+    },
+    "39": {
+        "type": 5,
+        "name": "i14"
+    },
+    "40": {
+        "type": 5,
+        "name": "i13"
+    },
+    "41": {
+        "type": 5,
+        "name": "i3"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "f"
+    },
+    "44": {
+        "type": 5,
+        "name": "h"
+    },
+    "45": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_4.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_4.expr
new file mode 100644
index 00000000..16fb5e10
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_4.expr
@@ -0,0 +1,420 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 4 Rule2VariableMerging: Old iters: [h,i3], new iters: [i58,i59] phis: [(h + (3 * i3)),i3] psis: [(i58 - (3 * i59)),i59]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "48",
+            "49",
+            "50",
+            "51",
+            "52",
+            "53"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "38",
+            "43",
+            "44",
+            "45",
+            "46",
+            "47"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i58": [
+                0,
+                230
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "i13": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "24"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i58"
+    },
+    "14": {
+        "type": 0,
+        "val": -4
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "23"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "22"
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "21"
+    },
+    "18": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 0,
+        "val": 3
+    },
+    "20": {
+        "type": 5,
+        "name": "i13"
+    },
+    "21": {
+        "type": 5,
+        "name": "i14"
+    },
+    "22": {
+        "type": 5,
+        "name": "w"
+    },
+    "23": {
+        "type": 0,
+        "val": -4
+    },
+    "24": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "25",
+        "indexes": [
+            "26",
+            "27",
+            "28",
+            "33"
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "c"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "i4"
+    },
+    "30": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 0,
+        "val": 3
+    },
+    "32": {
+        "type": 5,
+        "name": "i59"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "37"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i14"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "h"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 5,
+        "name": "i3"
+    },
+    "43": {
+        "type": 5,
+        "name": "i3"
+    },
+    "44": {
+        "type": 5,
+        "name": "i13"
+    },
+    "45": {
+        "type": 5,
+        "name": "n"
+    },
+    "46": {
+        "type": 5,
+        "name": "f"
+    },
+    "47": {
+        "type": 5,
+        "name": "w"
+    },
+    "48": {
+        "type": 5,
+        "name": "i13"
+    },
+    "49": {
+        "type": 5,
+        "name": "i3"
+    },
+    "50": {
+        "type": 5,
+        "name": "n"
+    },
+    "51": {
+        "type": 5,
+        "name": "f"
+    },
+    "52": {
+        "type": 5,
+        "name": "h"
+    },
+    "53": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_5.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_5.expr
new file mode 100644
index 00000000..4be0cf8e
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_5.expr
@@ -0,0 +1,345 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 5 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "36",
+            "41",
+            "42",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i58": [
+                0,
+                230
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "i13": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "22"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "12"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i4"
+    },
+    "11": {
+        "type": 5,
+        "name": "i58"
+    },
+    "12": {
+        "type": 0,
+        "val": -4
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "21"
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "20"
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 0,
+        "val": 3
+    },
+    "18": {
+        "type": 5,
+        "name": "i13"
+    },
+    "19": {
+        "type": 5,
+        "name": "i14"
+    },
+    "20": {
+        "type": 5,
+        "name": "w"
+    },
+    "21": {
+        "type": 0,
+        "val": -4
+    },
+    "22": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "23",
+        "indexes": [
+            "24",
+            "25",
+            "26",
+            "31"
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "24": {
+        "type": 5,
+        "name": "f"
+    },
+    "25": {
+        "type": 5,
+        "name": "c"
+    },
+    "26": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 5,
+        "name": "i4"
+    },
+    "28": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 0,
+        "val": 3
+    },
+    "30": {
+        "type": 5,
+        "name": "i59"
+    },
+    "31": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "32",
+        "rhs": "35"
+    },
+    "32": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 0,
+        "val": 3
+    },
+    "34": {
+        "type": 5,
+        "name": "i13"
+    },
+    "35": {
+        "type": 5,
+        "name": "i14"
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "h"
+    },
+    "38": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 0,
+        "val": 3
+    },
+    "40": {
+        "type": 5,
+        "name": "i3"
+    },
+    "41": {
+        "type": 5,
+        "name": "i3"
+    },
+    "42": {
+        "type": 5,
+        "name": "i13"
+    },
+    "43": {
+        "type": 5,
+        "name": "n"
+    },
+    "44": {
+        "type": 5,
+        "name": "f"
+    },
+    "45": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_6.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_6.expr
new file mode 100644
index 00000000..6abaecf3
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_6.expr
@@ -0,0 +1,420 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 6 Rule2VariableMerging: Old iters: [w,i13], new iters: [i62,i63] phis: [(w + (3 * i13)),i13] psis: [(i62 - (3 * i63)),i63]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "44",
+            "49",
+            "50",
+            "51",
+            "52",
+            "53"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i58": [
+                0,
+                230
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "i13": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "w": [
+                0,
+                224
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "34",
+            "39",
+            "40",
+            "41",
+            "42",
+            "43"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i62": [
+                0,
+                230
+            ],
+            "i63": [
+                0,
+                3
+            ],
+            "i58": [
+                0,
+                230
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "20"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i58"
+    },
+    "14": {
+        "type": 0,
+        "val": -4
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i14"
+    },
+    "18": {
+        "type": 5,
+        "name": "i62"
+    },
+    "19": {
+        "type": 0,
+        "val": -4
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "22",
+            "23",
+            "24",
+            "29"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "22": {
+        "type": 5,
+        "name": "f"
+    },
+    "23": {
+        "type": 5,
+        "name": "c"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "i4"
+    },
+    "26": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 0,
+        "val": 3
+    },
+    "28": {
+        "type": 5,
+        "name": "i59"
+    },
+    "29": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "i14"
+    },
+    "31": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 0,
+        "val": 3
+    },
+    "33": {
+        "type": 5,
+        "name": "i63"
+    },
+    "34": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "w"
+    },
+    "36": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 0,
+        "val": 3
+    },
+    "38": {
+        "type": 5,
+        "name": "i13"
+    },
+    "39": {
+        "type": 5,
+        "name": "i13"
+    },
+    "40": {
+        "type": 5,
+        "name": "i58"
+    },
+    "41": {
+        "type": 5,
+        "name": "i59"
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    },
+    "43": {
+        "type": 5,
+        "name": "f"
+    },
+    "44": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "45",
+        "rhs": "46"
+    },
+    "45": {
+        "type": 5,
+        "name": "h"
+    },
+    "46": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "47",
+        "rhs": "48"
+    },
+    "47": {
+        "type": 0,
+        "val": 3
+    },
+    "48": {
+        "type": 5,
+        "name": "i3"
+    },
+    "49": {
+        "type": 5,
+        "name": "i3"
+    },
+    "50": {
+        "type": 5,
+        "name": "i13"
+    },
+    "51": {
+        "type": 5,
+        "name": "n"
+    },
+    "52": {
+        "type": 5,
+        "name": "f"
+    },
+    "53": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_7.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_7.expr
new file mode 100644
index 00000000..765ba191
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_7.expr
@@ -0,0 +1,345 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 7 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "32",
+            "37",
+            "38",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                0,
+                230
+            ],
+            "i63": [
+                0,
+                3
+            ],
+            "i58": [
+                0,
+                230
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "18"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "12"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i4"
+    },
+    "11": {
+        "type": 5,
+        "name": "i58"
+    },
+    "12": {
+        "type": 0,
+        "val": -4
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "17"
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i14"
+    },
+    "16": {
+        "type": 5,
+        "name": "i62"
+    },
+    "17": {
+        "type": 0,
+        "val": -4
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "21",
+            "22",
+            "27"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "i4"
+    },
+    "24": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 0,
+        "val": 3
+    },
+    "26": {
+        "type": 5,
+        "name": "i59"
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i14"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 5,
+        "name": "i63"
+    },
+    "32": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "w"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i13"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "h"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 5,
+        "name": "i3"
+    },
+    "43": {
+        "type": 5,
+        "name": "i3"
+    },
+    "44": {
+        "type": 5,
+        "name": "n"
+    },
+    "45": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_8.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_8.expr
new file mode 100644
index 00000000..914cd639
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_8.expr
@@ -0,0 +1,345 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 8 Rule5RangeRelaxation: i62 (0,230) to (2,228),i58 (0,230) to (2,228),",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "32",
+            "37",
+            "38",
+            "43",
+            "44",
+            "45"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            2,
+            0,
+            2,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                2,
+                228
+            ],
+            "i63": [
+                0,
+                3
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "18"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "12"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i4"
+    },
+    "11": {
+        "type": 5,
+        "name": "i58"
+    },
+    "12": {
+        "type": 0,
+        "val": -4
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "17"
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i14"
+    },
+    "16": {
+        "type": 5,
+        "name": "i62"
+    },
+    "17": {
+        "type": 0,
+        "val": -4
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "21",
+            "22",
+            "27"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "i4"
+    },
+    "24": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 0,
+        "val": 3
+    },
+    "26": {
+        "type": 5,
+        "name": "i59"
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i14"
+    },
+    "29": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 5,
+        "name": "i63"
+    },
+    "32": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "w"
+    },
+    "34": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 5,
+        "name": "i13"
+    },
+    "37": {
+        "type": 5,
+        "name": "i13"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "h"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 5,
+        "name": "i3"
+    },
+    "43": {
+        "type": 5,
+        "name": "i3"
+    },
+    "44": {
+        "type": 5,
+        "name": "n"
+    },
+    "45": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_9.expr b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_9.expr
new file mode 100644
index 00000000..6744dcbc
--- /dev/null
+++ b/test/nnet/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_9.expr
@@ -0,0 +1,582 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 9 Rule8GuidedDLT: Toward Conv. guidedDLTMoreVar2 ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ],
+            "h": [
+                0,
+                224
+            ],
+            "w": [
+                0,
+                224
+            ]
+        },
+        "sumVarRanges": {
+            "i13": [
+                0,
+                3
+            ],
+            "i3": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "63",
+            "68",
+            "69",
+            "74",
+            "75",
+            "76"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            2,
+            0,
+            2,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i62": [
+                2,
+                228
+            ],
+            "i63": [
+                0,
+                3
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "i59": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                8
+            ],
+            "f": [
+                0,
+                32
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "51",
+            "60",
+            "61",
+            "62"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "i62": [
+                2,
+                228
+            ],
+            "i58": [
+                2,
+                228
+            ],
+            "n": [
+                0,
+                8
+            ]
+        },
+        "sumVarRanges": {
+            "i14": [
+                0,
+                3
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "c": [
+                0,
+                16
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "20"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            16,
+            224,
+            224
+        ],
+        "paddings": [
+            0,
+            0,
+            4,
+            4
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i4"
+    },
+    "13": {
+        "type": 5,
+        "name": "i58"
+    },
+    "14": {
+        "type": 0,
+        "val": -4
+    },
+    "15": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "16",
+        "rhs": "19"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i14"
+    },
+    "18": {
+        "type": 5,
+        "name": "i62"
+    },
+    "19": {
+        "type": 0,
+        "val": -4
+    },
+    "20": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "21",
+        "indexes": [
+            "47",
+            "48",
+            "49",
+            "50"
+        ]
+    },
+    "21": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            288,
+            16,
+            3,
+            3
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "22"
+    },
+    "22": {
+        "type": 105,
+        "inputs": [
+            "23"
+        ],
+        "expr": "24",
+        "outputShape": [
+            288,
+            16,
+            3,
+            3
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "24": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "25",
+        "loopVarRanges": {
+            "i64": [
+                0,
+                288
+            ],
+            "c": [
+                0,
+                16
+            ],
+            "i4": [
+                0,
+                3
+            ],
+            "i14": [
+                0,
+                3
+            ]
+        }
+    },
+    "25": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "26",
+        "indexes": [
+            "27",
+            "30",
+            "31",
+            "40"
+        ]
+    },
+    "26": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            32,
+            16,
+            9,
+            9
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "27": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i64"
+    },
+    "29": {
+        "type": 0,
+        "val": 9
+    },
+    "30": {
+        "type": 5,
+        "name": "c"
+    },
+    "31": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "i4"
+    },
+    "33": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 0,
+        "val": 3
+    },
+    "35": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i64"
+    },
+    "38": {
+        "type": 0,
+        "val": 3
+    },
+    "39": {
+        "type": 0,
+        "val": 3
+    },
+    "40": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "41",
+        "rhs": "46"
+    },
+    "41": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 0,
+        "val": 3
+    },
+    "43": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "i64"
+    },
+    "45": {
+        "type": 0,
+        "val": 3
+    },
+    "46": {
+        "type": 5,
+        "name": "i14"
+    },
+    "47": {
+        "type": 5,
+        "name": "i64"
+    },
+    "48": {
+        "type": 5,
+        "name": "c"
+    },
+    "49": {
+        "type": 5,
+        "name": "i4"
+    },
+    "50": {
+        "type": 5,
+        "name": "i14"
+    },
+    "51": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "52",
+        "rhs": "59"
+    },
+    "52": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "53",
+        "rhs": "56"
+    },
+    "53": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 0,
+        "val": 9
+    },
+    "55": {
+        "type": 5,
+        "name": "f"
+    },
+    "56": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "57",
+        "rhs": "58"
+    },
+    "57": {
+        "type": 0,
+        "val": 3
+    },
+    "58": {
+        "type": 5,
+        "name": "i59"
+    },
+    "59": {
+        "type": 5,
+        "name": "i63"
+    },
+    "60": {
+        "type": 5,
+        "name": "i62"
+    },
+    "61": {
+        "type": 5,
+        "name": "i58"
+    },
+    "62": {
+        "type": 5,
+        "name": "n"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "65"
+    },
+    "64": {
+        "type": 5,
+        "name": "w"
+    },
+    "65": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 0,
+        "val": 3
+    },
+    "67": {
+        "type": 5,
+        "name": "i13"
+    },
+    "68": {
+        "type": 5,
+        "name": "i13"
+    },
+    "69": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 5,
+        "name": "h"
+    },
+    "71": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "72",
+        "rhs": "73"
+    },
+    "72": {
+        "type": 0,
+        "val": 3
+    },
+    "73": {
+        "type": 5,
+        "name": "i3"
+    },
+    "74": {
+        "type": 5,
+        "name": "i3"
+    },
+    "75": {
+        "type": 5,
+        "name": "n"
+    },
+    "76": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_0.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_0.expr
new file mode 100644
index 00000000..642b8e3a
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_0.expr
@@ -0,0 +1,179 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ],
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "16"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "10",
+            "15"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "6",
+        "rhs": "9"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "8"
+    },
+    "7": {
+        "type": 5,
+        "name": "h"
+    },
+    "8": {
+        "type": 5,
+        "name": "r"
+    },
+    "9": {
+        "type": 0,
+        "val": -1
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "w"
+    },
+    "13": {
+        "type": 5,
+        "name": "s"
+    },
+    "14": {
+        "type": 0,
+        "val": -1
+    },
+    "15": {
+        "type": 5,
+        "name": "c"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "r"
+    },
+    "19": {
+        "type": 5,
+        "name": "s"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_1.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_1.expr
new file mode 100644
index 00000000..e67afd83
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_1.expr
@@ -0,0 +1,256 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "24",
+            "25",
+            "26",
+            "27",
+            "28",
+            "29"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "18"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "12",
+            "17"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "11"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "10"
+    },
+    "9": {
+        "type": 5,
+        "name": "h"
+    },
+    "10": {
+        "type": 5,
+        "name": "r"
+    },
+    "11": {
+        "type": 0,
+        "val": -1
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "16"
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "w"
+    },
+    "15": {
+        "type": 5,
+        "name": "s"
+    },
+    "16": {
+        "type": 0,
+        "val": -1
+    },
+    "17": {
+        "type": 5,
+        "name": "c"
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "21",
+            "22",
+            "23"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 5,
+        "name": "r"
+    },
+    "21": {
+        "type": 5,
+        "name": "s"
+    },
+    "22": {
+        "type": 5,
+        "name": "f"
+    },
+    "23": {
+        "type": 5,
+        "name": "c"
+    },
+    "24": {
+        "type": 5,
+        "name": "r"
+    },
+    "25": {
+        "type": 5,
+        "name": "s"
+    },
+    "26": {
+        "type": 5,
+        "name": "n"
+    },
+    "27": {
+        "type": 5,
+        "name": "h"
+    },
+    "28": {
+        "type": 5,
+        "name": "w"
+    },
+    "29": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_10.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_10.expr
new file mode 100644
index 00000000..ff9edf63
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_10.expr
@@ -0,0 +1,898 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "104",
+            "107",
+            "108",
+            "111",
+            "112",
+            "113"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i27": [
+                1,
+                8
+            ],
+            "i28": [
+                0,
+                3
+            ],
+            "i17": [
+                1,
+                8
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "4",
+        "indexes": [
+            "82",
+            "95"
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            49,
+            4608
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 101,
+        "inputs": [
+            "6",
+            "22"
+        ],
+        "expr": "40",
+        "args": [
+            1,
+            49,
+            4608,
+            512,
+            false,
+            true
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "7"
+    },
+    "7": {
+        "type": 105,
+        "inputs": [
+            "8"
+        ],
+        "expr": "9",
+        "outputShape": [
+            49,
+            512
+        ]
+    },
+    "8": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "9": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "10",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "10": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "11",
+        "indexes": [
+            "12",
+            "15",
+            "18",
+            "21"
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "12": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "i32"
+    },
+    "14": {
+        "type": 0,
+        "val": 49
+    },
+    "15": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i32"
+    },
+    "17": {
+        "type": 0,
+        "val": 7
+    },
+    "18": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i32"
+    },
+    "20": {
+        "type": 0,
+        "val": 7
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            4608,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "23"
+    },
+    "23": {
+        "type": 105,
+        "inputs": [
+            "24"
+        ],
+        "expr": "25",
+        "outputShape": [
+            4608,
+            512
+        ]
+    },
+    "24": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "25": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "26",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "26": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "27",
+        "indexes": [
+            "28",
+            "31",
+            "36",
+            "39"
+        ]
+    },
+    "27": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "28": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "i31"
+    },
+    "30": {
+        "type": 0,
+        "val": 1536
+    },
+    "31": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "32",
+        "rhs": "35"
+    },
+    "32": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "i31"
+    },
+    "34": {
+        "type": 0,
+        "val": 512
+    },
+    "35": {
+        "type": 0,
+        "val": 3
+    },
+    "36": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i31"
+    },
+    "38": {
+        "type": 0,
+        "val": 512
+    },
+    "39": {
+        "type": 5,
+        "name": "c"
+    },
+    "40": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "41",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "i31": [
+                0,
+                4608
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "41": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "42",
+        "rhs": "61"
+    },
+    "42": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "43",
+        "indexes": [
+            "59",
+            "60"
+        ]
+    },
+    "43": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "44"
+    },
+    "44": {
+        "type": 105,
+        "inputs": [
+            "45"
+        ],
+        "expr": "46",
+        "outputShape": [
+            49,
+            512
+        ]
+    },
+    "45": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "46": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "47",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "47": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "48",
+        "indexes": [
+            "49",
+            "52",
+            "55",
+            "58"
+        ]
+    },
+    "48": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "49": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "50",
+        "rhs": "51"
+    },
+    "50": {
+        "type": 5,
+        "name": "i32"
+    },
+    "51": {
+        "type": 0,
+        "val": 49
+    },
+    "52": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "53",
+        "rhs": "54"
+    },
+    "53": {
+        "type": 5,
+        "name": "i32"
+    },
+    "54": {
+        "type": 0,
+        "val": 7
+    },
+    "55": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "56",
+        "rhs": "57"
+    },
+    "56": {
+        "type": 5,
+        "name": "i32"
+    },
+    "57": {
+        "type": 0,
+        "val": 7
+    },
+    "58": {
+        "type": 5,
+        "name": "c"
+    },
+    "59": {
+        "type": 5,
+        "name": "i32"
+    },
+    "60": {
+        "type": 5,
+        "name": "c"
+    },
+    "61": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "62",
+        "indexes": [
+            "80",
+            "81"
+        ]
+    },
+    "62": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            4608,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "63"
+    },
+    "63": {
+        "type": 105,
+        "inputs": [
+            "64"
+        ],
+        "expr": "65",
+        "outputShape": [
+            4608,
+            512
+        ]
+    },
+    "64": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "65": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "66",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "66": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "67",
+        "indexes": [
+            "68",
+            "71",
+            "76",
+            "79"
+        ]
+    },
+    "67": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "68": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 5,
+        "name": "i31"
+    },
+    "70": {
+        "type": 0,
+        "val": 1536
+    },
+    "71": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "72",
+        "rhs": "75"
+    },
+    "72": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "73",
+        "rhs": "74"
+    },
+    "73": {
+        "type": 5,
+        "name": "i31"
+    },
+    "74": {
+        "type": 0,
+        "val": 512
+    },
+    "75": {
+        "type": 0,
+        "val": 3
+    },
+    "76": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "77",
+        "rhs": "78"
+    },
+    "77": {
+        "type": 5,
+        "name": "i31"
+    },
+    "78": {
+        "type": 0,
+        "val": 512
+    },
+    "79": {
+        "type": 5,
+        "name": "c"
+    },
+    "80": {
+        "type": 5,
+        "name": "i31"
+    },
+    "81": {
+        "type": 5,
+        "name": "c"
+    },
+    "82": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "83",
+        "rhs": "92"
+    },
+    "83": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "84",
+        "rhs": "87"
+    },
+    "84": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "85",
+        "rhs": "86"
+    },
+    "85": {
+        "type": 0,
+        "val": 49
+    },
+    "86": {
+        "type": 5,
+        "name": "n"
+    },
+    "87": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "88",
+        "rhs": "89"
+    },
+    "88": {
+        "type": 0,
+        "val": 7
+    },
+    "89": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "90",
+        "rhs": "91"
+    },
+    "90": {
+        "type": 5,
+        "name": "i17"
+    },
+    "91": {
+        "type": 0,
+        "val": -1
+    },
+    "92": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "93",
+        "rhs": "94"
+    },
+    "93": {
+        "type": 5,
+        "name": "i27"
+    },
+    "94": {
+        "type": 0,
+        "val": -1
+    },
+    "95": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "96",
+        "rhs": "103"
+    },
+    "96": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "97",
+        "rhs": "100"
+    },
+    "97": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "98",
+        "rhs": "99"
+    },
+    "98": {
+        "type": 0,
+        "val": 1536
+    },
+    "99": {
+        "type": 5,
+        "name": "i18"
+    },
+    "100": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "101",
+        "rhs": "102"
+    },
+    "101": {
+        "type": 0,
+        "val": 512
+    },
+    "102": {
+        "type": 5,
+        "name": "i28"
+    },
+    "103": {
+        "type": 5,
+        "name": "f"
+    },
+    "104": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "105",
+        "rhs": "106"
+    },
+    "105": {
+        "type": 5,
+        "name": "w"
+    },
+    "106": {
+        "type": 5,
+        "name": "s"
+    },
+    "107": {
+        "type": 5,
+        "name": "s"
+    },
+    "108": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "109",
+        "rhs": "110"
+    },
+    "109": {
+        "type": 5,
+        "name": "h"
+    },
+    "110": {
+        "type": 5,
+        "name": "r"
+    },
+    "111": {
+        "type": 5,
+        "name": "r"
+    },
+    "112": {
+        "type": 5,
+        "name": "n"
+    },
+    "113": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_11.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_11.expr
new file mode 100644
index 00000000..b47e1444
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_11.expr
@@ -0,0 +1,1573 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 4,
+        "name": "T21",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "1"
+    },
+    "1": {
+        "type": 105,
+        "inputs": [
+            "2"
+        ],
+        "expr": "80",
+        "outputShape": [
+            1,
+            7,
+            7,
+            512
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            49,
+            4608
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 101,
+        "inputs": [
+            "4",
+            "20"
+        ],
+        "expr": "38",
+        "args": [
+            1,
+            49,
+            4608,
+            512,
+            false,
+            true
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 105,
+        "inputs": [
+            "6"
+        ],
+        "expr": "7",
+        "outputShape": [
+            49,
+            512
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "7": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "8",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "9",
+        "indexes": [
+            "10",
+            "13",
+            "16",
+            "19"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "i32"
+    },
+    "12": {
+        "type": 0,
+        "val": 49
+    },
+    "13": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "i32"
+    },
+    "15": {
+        "type": 0,
+        "val": 7
+    },
+    "16": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i32"
+    },
+    "18": {
+        "type": 0,
+        "val": 7
+    },
+    "19": {
+        "type": 5,
+        "name": "c"
+    },
+    "20": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            4608,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "21"
+    },
+    "21": {
+        "type": 105,
+        "inputs": [
+            "22"
+        ],
+        "expr": "23",
+        "outputShape": [
+            4608,
+            512
+        ]
+    },
+    "22": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "23": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "24",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "24": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "25",
+        "indexes": [
+            "26",
+            "29",
+            "34",
+            "37"
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 5,
+        "name": "i31"
+    },
+    "28": {
+        "type": 0,
+        "val": 1536
+    },
+    "29": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "30",
+        "rhs": "33"
+    },
+    "30": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 5,
+        "name": "i31"
+    },
+    "32": {
+        "type": 0,
+        "val": 512
+    },
+    "33": {
+        "type": 0,
+        "val": 3
+    },
+    "34": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "i31"
+    },
+    "36": {
+        "type": 0,
+        "val": 512
+    },
+    "37": {
+        "type": 5,
+        "name": "c"
+    },
+    "38": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "39",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "i31": [
+                0,
+                4608
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "39": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "40",
+        "rhs": "59"
+    },
+    "40": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "41",
+        "indexes": [
+            "57",
+            "58"
+        ]
+    },
+    "41": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "42"
+    },
+    "42": {
+        "type": 105,
+        "inputs": [
+            "43"
+        ],
+        "expr": "44",
+        "outputShape": [
+            49,
+            512
+        ]
+    },
+    "43": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "44": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "45",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "45": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "46",
+        "indexes": [
+            "47",
+            "50",
+            "53",
+            "56"
+        ]
+    },
+    "46": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "47": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "48",
+        "rhs": "49"
+    },
+    "48": {
+        "type": 5,
+        "name": "i32"
+    },
+    "49": {
+        "type": 0,
+        "val": 49
+    },
+    "50": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "51",
+        "rhs": "52"
+    },
+    "51": {
+        "type": 5,
+        "name": "i32"
+    },
+    "52": {
+        "type": 0,
+        "val": 7
+    },
+    "53": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 5,
+        "name": "i32"
+    },
+    "55": {
+        "type": 0,
+        "val": 7
+    },
+    "56": {
+        "type": 5,
+        "name": "c"
+    },
+    "57": {
+        "type": 5,
+        "name": "i32"
+    },
+    "58": {
+        "type": 5,
+        "name": "c"
+    },
+    "59": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "60",
+        "indexes": [
+            "78",
+            "79"
+        ]
+    },
+    "60": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            4608,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "61"
+    },
+    "61": {
+        "type": 105,
+        "inputs": [
+            "62"
+        ],
+        "expr": "63",
+        "outputShape": [
+            4608,
+            512
+        ]
+    },
+    "62": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "63": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "64",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "64": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "65",
+        "indexes": [
+            "66",
+            "69",
+            "74",
+            "77"
+        ]
+    },
+    "65": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "66": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "67",
+        "rhs": "68"
+    },
+    "67": {
+        "type": 5,
+        "name": "i31"
+    },
+    "68": {
+        "type": 0,
+        "val": 1536
+    },
+    "69": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "70",
+        "rhs": "73"
+    },
+    "70": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "71",
+        "rhs": "72"
+    },
+    "71": {
+        "type": 5,
+        "name": "i31"
+    },
+    "72": {
+        "type": 0,
+        "val": 512
+    },
+    "73": {
+        "type": 0,
+        "val": 3
+    },
+    "74": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "75",
+        "rhs": "76"
+    },
+    "75": {
+        "type": 5,
+        "name": "i31"
+    },
+    "76": {
+        "type": 0,
+        "val": 512
+    },
+    "77": {
+        "type": 5,
+        "name": "c"
+    },
+    "78": {
+        "type": 5,
+        "name": "i31"
+    },
+    "79": {
+        "type": 5,
+        "name": "c"
+    },
+    "80": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "81",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "81": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "82",
+        "indexes": [
+            "184",
+            "187",
+            "188",
+            "191",
+            "192",
+            "193"
+        ]
+    },
+    "82": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "83",
+        "loopVarRanges": {
+            "i27": [
+                1,
+                8
+            ],
+            "i28": [
+                0,
+                3
+            ],
+            "i17": [
+                1,
+                8
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                512
+            ]
+        }
+    },
+    "83": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "84",
+        "indexes": [
+            "162",
+            "175"
+        ]
+    },
+    "84": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            49,
+            4608
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "85"
+    },
+    "85": {
+        "type": 101,
+        "inputs": [
+            "86",
+            "102"
+        ],
+        "expr": "120",
+        "args": [
+            1,
+            49,
+            4608,
+            512,
+            false,
+            true
+        ]
+    },
+    "86": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "87"
+    },
+    "87": {
+        "type": 105,
+        "inputs": [
+            "88"
+        ],
+        "expr": "89",
+        "outputShape": [
+            49,
+            512
+        ]
+    },
+    "88": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "89": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "90",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "90": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "91",
+        "indexes": [
+            "92",
+            "95",
+            "98",
+            "101"
+        ]
+    },
+    "91": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "92": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "93",
+        "rhs": "94"
+    },
+    "93": {
+        "type": 5,
+        "name": "i32"
+    },
+    "94": {
+        "type": 0,
+        "val": 49
+    },
+    "95": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "96",
+        "rhs": "97"
+    },
+    "96": {
+        "type": 5,
+        "name": "i32"
+    },
+    "97": {
+        "type": 0,
+        "val": 7
+    },
+    "98": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "99",
+        "rhs": "100"
+    },
+    "99": {
+        "type": 5,
+        "name": "i32"
+    },
+    "100": {
+        "type": 0,
+        "val": 7
+    },
+    "101": {
+        "type": 5,
+        "name": "c"
+    },
+    "102": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            4608,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "103"
+    },
+    "103": {
+        "type": 105,
+        "inputs": [
+            "104"
+        ],
+        "expr": "105",
+        "outputShape": [
+            4608,
+            512
+        ]
+    },
+    "104": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "105": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "106",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "106": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "107",
+        "indexes": [
+            "108",
+            "111",
+            "116",
+            "119"
+        ]
+    },
+    "107": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "108": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "109",
+        "rhs": "110"
+    },
+    "109": {
+        "type": 5,
+        "name": "i31"
+    },
+    "110": {
+        "type": 0,
+        "val": 1536
+    },
+    "111": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "112",
+        "rhs": "115"
+    },
+    "112": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "113",
+        "rhs": "114"
+    },
+    "113": {
+        "type": 5,
+        "name": "i31"
+    },
+    "114": {
+        "type": 0,
+        "val": 512
+    },
+    "115": {
+        "type": 0,
+        "val": 3
+    },
+    "116": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "117",
+        "rhs": "118"
+    },
+    "117": {
+        "type": 5,
+        "name": "i31"
+    },
+    "118": {
+        "type": 0,
+        "val": 512
+    },
+    "119": {
+        "type": 5,
+        "name": "c"
+    },
+    "120": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "121",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "i31": [
+                0,
+                4608
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "121": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "122",
+        "rhs": "141"
+    },
+    "122": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "123",
+        "indexes": [
+            "139",
+            "140"
+        ]
+    },
+    "123": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "124"
+    },
+    "124": {
+        "type": 105,
+        "inputs": [
+            "125"
+        ],
+        "expr": "126",
+        "outputShape": [
+            49,
+            512
+        ]
+    },
+    "125": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "126": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "127",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "127": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "128",
+        "indexes": [
+            "129",
+            "132",
+            "135",
+            "138"
+        ]
+    },
+    "128": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "129": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "130",
+        "rhs": "131"
+    },
+    "130": {
+        "type": 5,
+        "name": "i32"
+    },
+    "131": {
+        "type": 0,
+        "val": 49
+    },
+    "132": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "133",
+        "rhs": "134"
+    },
+    "133": {
+        "type": 5,
+        "name": "i32"
+    },
+    "134": {
+        "type": 0,
+        "val": 7
+    },
+    "135": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "136",
+        "rhs": "137"
+    },
+    "136": {
+        "type": 5,
+        "name": "i32"
+    },
+    "137": {
+        "type": 0,
+        "val": 7
+    },
+    "138": {
+        "type": 5,
+        "name": "c"
+    },
+    "139": {
+        "type": 5,
+        "name": "i32"
+    },
+    "140": {
+        "type": 5,
+        "name": "c"
+    },
+    "141": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "142",
+        "indexes": [
+            "160",
+            "161"
+        ]
+    },
+    "142": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            4608,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "143"
+    },
+    "143": {
+        "type": 105,
+        "inputs": [
+            "144"
+        ],
+        "expr": "145",
+        "outputShape": [
+            4608,
+            512
+        ]
+    },
+    "144": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "145": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "146",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "146": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "147",
+        "indexes": [
+            "148",
+            "151",
+            "156",
+            "159"
+        ]
+    },
+    "147": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "148": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "149",
+        "rhs": "150"
+    },
+    "149": {
+        "type": 5,
+        "name": "i31"
+    },
+    "150": {
+        "type": 0,
+        "val": 1536
+    },
+    "151": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "152",
+        "rhs": "155"
+    },
+    "152": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "153",
+        "rhs": "154"
+    },
+    "153": {
+        "type": 5,
+        "name": "i31"
+    },
+    "154": {
+        "type": 0,
+        "val": 512
+    },
+    "155": {
+        "type": 0,
+        "val": 3
+    },
+    "156": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "157",
+        "rhs": "158"
+    },
+    "157": {
+        "type": 5,
+        "name": "i31"
+    },
+    "158": {
+        "type": 0,
+        "val": 512
+    },
+    "159": {
+        "type": 5,
+        "name": "c"
+    },
+    "160": {
+        "type": 5,
+        "name": "i31"
+    },
+    "161": {
+        "type": 5,
+        "name": "c"
+    },
+    "162": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "163",
+        "rhs": "172"
+    },
+    "163": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "164",
+        "rhs": "167"
+    },
+    "164": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "165",
+        "rhs": "166"
+    },
+    "165": {
+        "type": 0,
+        "val": 49
+    },
+    "166": {
+        "type": 5,
+        "name": "n"
+    },
+    "167": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "168",
+        "rhs": "169"
+    },
+    "168": {
+        "type": 0,
+        "val": 7
+    },
+    "169": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "170",
+        "rhs": "171"
+    },
+    "170": {
+        "type": 5,
+        "name": "i17"
+    },
+    "171": {
+        "type": 0,
+        "val": -1
+    },
+    "172": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "173",
+        "rhs": "174"
+    },
+    "173": {
+        "type": 5,
+        "name": "i27"
+    },
+    "174": {
+        "type": 0,
+        "val": -1
+    },
+    "175": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "176",
+        "rhs": "183"
+    },
+    "176": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "177",
+        "rhs": "180"
+    },
+    "177": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "178",
+        "rhs": "179"
+    },
+    "178": {
+        "type": 0,
+        "val": 1536
+    },
+    "179": {
+        "type": 5,
+        "name": "i18"
+    },
+    "180": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "181",
+        "rhs": "182"
+    },
+    "181": {
+        "type": 0,
+        "val": 512
+    },
+    "182": {
+        "type": 5,
+        "name": "i28"
+    },
+    "183": {
+        "type": 5,
+        "name": "f"
+    },
+    "184": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "185",
+        "rhs": "186"
+    },
+    "185": {
+        "type": 5,
+        "name": "w"
+    },
+    "186": {
+        "type": 5,
+        "name": "s"
+    },
+    "187": {
+        "type": 5,
+        "name": "s"
+    },
+    "188": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "189",
+        "rhs": "190"
+    },
+    "189": {
+        "type": 5,
+        "name": "h"
+    },
+    "190": {
+        "type": 5,
+        "name": "r"
+    },
+    "191": {
+        "type": 5,
+        "name": "r"
+    },
+    "192": {
+        "type": 5,
+        "name": "n"
+    },
+    "193": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_2.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_2.expr
new file mode 100644
index 00000000..fa7ac5d4
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_2.expr
@@ -0,0 +1,331 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "32",
+            "33",
+            "34",
+            "35",
+            "36",
+            "37"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "24",
+            "27",
+            "28",
+            "29",
+            "30",
+            "31"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i17": [
+                0,
+                9
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "18"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "12",
+            "17"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i17"
+    },
+    "11": {
+        "type": 0,
+        "val": -1
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "16"
+    },
+    "13": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "s"
+    },
+    "15": {
+        "type": 5,
+        "name": "w"
+    },
+    "16": {
+        "type": 0,
+        "val": -1
+    },
+    "17": {
+        "type": 5,
+        "name": "c"
+    },
+    "18": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "19",
+        "indexes": [
+            "20",
+            "21",
+            "22",
+            "23"
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 5,
+        "name": "i18"
+    },
+    "21": {
+        "type": 5,
+        "name": "s"
+    },
+    "22": {
+        "type": 5,
+        "name": "f"
+    },
+    "23": {
+        "type": 5,
+        "name": "c"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "h"
+    },
+    "26": {
+        "type": 5,
+        "name": "r"
+    },
+    "27": {
+        "type": 5,
+        "name": "r"
+    },
+    "28": {
+        "type": 5,
+        "name": "s"
+    },
+    "29": {
+        "type": 5,
+        "name": "n"
+    },
+    "30": {
+        "type": 5,
+        "name": "w"
+    },
+    "31": {
+        "type": 5,
+        "name": "f"
+    },
+    "32": {
+        "type": 5,
+        "name": "r"
+    },
+    "33": {
+        "type": 5,
+        "name": "s"
+    },
+    "34": {
+        "type": 5,
+        "name": "n"
+    },
+    "35": {
+        "type": 5,
+        "name": "h"
+    },
+    "36": {
+        "type": 5,
+        "name": "w"
+    },
+    "37": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_3.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_3.expr
new file mode 100644
index 00000000..f8b2a513
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_3.expr
@@ -0,0 +1,256 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "22",
+            "25",
+            "26",
+            "27",
+            "28",
+            "29"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i17": [
+                0,
+                9
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "16"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "10",
+            "15"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "i17"
+    },
+    "9": {
+        "type": 0,
+        "val": -1
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "14"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "s"
+    },
+    "13": {
+        "type": 5,
+        "name": "w"
+    },
+    "14": {
+        "type": 0,
+        "val": -1
+    },
+    "15": {
+        "type": 5,
+        "name": "c"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "i18"
+    },
+    "19": {
+        "type": 5,
+        "name": "s"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "h"
+    },
+    "24": {
+        "type": 5,
+        "name": "r"
+    },
+    "25": {
+        "type": 5,
+        "name": "r"
+    },
+    "26": {
+        "type": 5,
+        "name": "s"
+    },
+    "27": {
+        "type": 5,
+        "name": "n"
+    },
+    "28": {
+        "type": 5,
+        "name": "w"
+    },
+    "29": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_4.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_4.expr
new file mode 100644
index 00000000..f8d37bba
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_4.expr
@@ -0,0 +1,331 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "30",
+            "33",
+            "34",
+            "35",
+            "36",
+            "37"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i17": [
+                0,
+                9
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "22",
+            "25",
+            "26",
+            "27",
+            "28",
+            "29"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i27": [
+                0,
+                9
+            ],
+            "i28": [
+                0,
+                3
+            ],
+            "i17": [
+                0,
+                9
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "16"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "12",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i17"
+    },
+    "11": {
+        "type": 0,
+        "val": -1
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "i27"
+    },
+    "14": {
+        "type": 0,
+        "val": -1
+    },
+    "15": {
+        "type": 5,
+        "name": "c"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "i18"
+    },
+    "19": {
+        "type": 5,
+        "name": "i28"
+    },
+    "20": {
+        "type": 5,
+        "name": "f"
+    },
+    "21": {
+        "type": 5,
+        "name": "c"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "w"
+    },
+    "24": {
+        "type": 5,
+        "name": "s"
+    },
+    "25": {
+        "type": 5,
+        "name": "s"
+    },
+    "26": {
+        "type": 5,
+        "name": "i17"
+    },
+    "27": {
+        "type": 5,
+        "name": "i18"
+    },
+    "28": {
+        "type": 5,
+        "name": "n"
+    },
+    "29": {
+        "type": 5,
+        "name": "f"
+    },
+    "30": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 5,
+        "name": "h"
+    },
+    "32": {
+        "type": 5,
+        "name": "r"
+    },
+    "33": {
+        "type": 5,
+        "name": "r"
+    },
+    "34": {
+        "type": 5,
+        "name": "s"
+    },
+    "35": {
+        "type": 5,
+        "name": "n"
+    },
+    "36": {
+        "type": 5,
+        "name": "w"
+    },
+    "37": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_5.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_5.expr
new file mode 100644
index 00000000..4a30b329
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_5.expr
@@ -0,0 +1,256 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "20",
+            "23",
+            "24",
+            "27",
+            "28",
+            "29"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i27": [
+                0,
+                9
+            ],
+            "i28": [
+                0,
+                3
+            ],
+            "i17": [
+                0,
+                9
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "14"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "10",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "i17"
+    },
+    "9": {
+        "type": 0,
+        "val": -1
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "i27"
+    },
+    "12": {
+        "type": 0,
+        "val": -1
+    },
+    "13": {
+        "type": 5,
+        "name": "c"
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "15",
+        "indexes": [
+            "16",
+            "17",
+            "18",
+            "19"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "16": {
+        "type": 5,
+        "name": "i18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i28"
+    },
+    "18": {
+        "type": 5,
+        "name": "f"
+    },
+    "19": {
+        "type": 5,
+        "name": "c"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "w"
+    },
+    "22": {
+        "type": 5,
+        "name": "s"
+    },
+    "23": {
+        "type": 5,
+        "name": "s"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "h"
+    },
+    "26": {
+        "type": 5,
+        "name": "r"
+    },
+    "27": {
+        "type": 5,
+        "name": "r"
+    },
+    "28": {
+        "type": 5,
+        "name": "n"
+    },
+    "29": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_6.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_6.expr
new file mode 100644
index 00000000..03c4b510
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_6.expr
@@ -0,0 +1,256 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "20",
+            "23",
+            "24",
+            "27",
+            "28",
+            "29"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i27": [
+                1,
+                8
+            ],
+            "i28": [
+                0,
+                3
+            ],
+            "i17": [
+                1,
+                8
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "14"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "10",
+            "13"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "9"
+    },
+    "8": {
+        "type": 5,
+        "name": "i17"
+    },
+    "9": {
+        "type": 0,
+        "val": -1
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "i27"
+    },
+    "12": {
+        "type": 0,
+        "val": -1
+    },
+    "13": {
+        "type": 5,
+        "name": "c"
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "15",
+        "indexes": [
+            "16",
+            "17",
+            "18",
+            "19"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "16": {
+        "type": 5,
+        "name": "i18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i28"
+    },
+    "18": {
+        "type": 5,
+        "name": "f"
+    },
+    "19": {
+        "type": 5,
+        "name": "c"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "w"
+    },
+    "22": {
+        "type": 5,
+        "name": "s"
+    },
+    "23": {
+        "type": 5,
+        "name": "s"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "h"
+    },
+    "26": {
+        "type": 5,
+        "name": "r"
+    },
+    "27": {
+        "type": 5,
+        "name": "r"
+    },
+    "28": {
+        "type": 5,
+        "name": "n"
+    },
+    "29": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_7.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_7.expr
new file mode 100644
index 00000000..ffbfcf7c
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_7.expr
@@ -0,0 +1,467 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "49",
+            "52",
+            "53",
+            "56",
+            "57",
+            "58"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i27": [
+                1,
+                8
+            ],
+            "i28": [
+                0,
+                3
+            ],
+            "i17": [
+                1,
+                8
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "37",
+            "46",
+            "47",
+            "48"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "i27": [
+                1,
+                8
+            ],
+            "i17": [
+                1,
+                8
+            ],
+            "n": [
+                0,
+                1
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "16"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "12",
+            "15"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i17"
+    },
+    "11": {
+        "type": 0,
+        "val": -1
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "i27"
+    },
+    "14": {
+        "type": 0,
+        "val": -1
+    },
+    "15": {
+        "type": 5,
+        "name": "c"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "17",
+        "indexes": [
+            "35",
+            "36"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            4608,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "18"
+    },
+    "18": {
+        "type": 105,
+        "inputs": [
+            "19"
+        ],
+        "expr": "20",
+        "outputShape": [
+            4608,
+            512
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "21",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "21": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "22",
+        "indexes": [
+            "23",
+            "26",
+            "31",
+            "34"
+        ]
+    },
+    "22": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "23": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "i31"
+    },
+    "25": {
+        "type": 0,
+        "val": 1536
+    },
+    "26": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "27",
+        "rhs": "30"
+    },
+    "27": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "i31"
+    },
+    "29": {
+        "type": 0,
+        "val": 512
+    },
+    "30": {
+        "type": 0,
+        "val": 3
+    },
+    "31": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "i31"
+    },
+    "33": {
+        "type": 0,
+        "val": 512
+    },
+    "34": {
+        "type": 5,
+        "name": "c"
+    },
+    "35": {
+        "type": 5,
+        "name": "i31"
+    },
+    "36": {
+        "type": 5,
+        "name": "c"
+    },
+    "37": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "38",
+        "rhs": "45"
+    },
+    "38": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "39",
+        "rhs": "42"
+    },
+    "39": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 0,
+        "val": 1536
+    },
+    "41": {
+        "type": 5,
+        "name": "i18"
+    },
+    "42": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "43",
+        "rhs": "44"
+    },
+    "43": {
+        "type": 0,
+        "val": 512
+    },
+    "44": {
+        "type": 5,
+        "name": "i28"
+    },
+    "45": {
+        "type": 5,
+        "name": "f"
+    },
+    "46": {
+        "type": 5,
+        "name": "i27"
+    },
+    "47": {
+        "type": 5,
+        "name": "i17"
+    },
+    "48": {
+        "type": 5,
+        "name": "n"
+    },
+    "49": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "50",
+        "rhs": "51"
+    },
+    "50": {
+        "type": 5,
+        "name": "w"
+    },
+    "51": {
+        "type": 5,
+        "name": "s"
+    },
+    "52": {
+        "type": 5,
+        "name": "s"
+    },
+    "53": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 5,
+        "name": "h"
+    },
+    "55": {
+        "type": 5,
+        "name": "r"
+    },
+    "56": {
+        "type": 5,
+        "name": "r"
+    },
+    "57": {
+        "type": 5,
+        "name": "n"
+    },
+    "58": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_8.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_8.expr
new file mode 100644
index 00000000..acf9bf4d
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_8.expr
@@ -0,0 +1,648 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "74",
+            "77",
+            "78",
+            "81",
+            "82",
+            "83"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i27": [
+                1,
+                8
+            ],
+            "i28": [
+                0,
+                3
+            ],
+            "i17": [
+                1,
+                8
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "62",
+            "71",
+            "72",
+            "73"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "i27": [
+                1,
+                8
+            ],
+            "i17": [
+                1,
+                8
+            ],
+            "n": [
+                0,
+                1
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "6",
+        "indexes": [
+            "48",
+            "61"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "i31": [
+                0,
+                4608
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "27"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "9",
+        "indexes": [
+            "25",
+            "26"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "10"
+    },
+    "10": {
+        "type": 105,
+        "inputs": [
+            "11"
+        ],
+        "expr": "12",
+        "outputShape": [
+            49,
+            512
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "13": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "14",
+        "indexes": [
+            "15",
+            "18",
+            "21",
+            "24"
+        ]
+    },
+    "14": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "15": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i32"
+    },
+    "17": {
+        "type": 0,
+        "val": 49
+    },
+    "18": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i32"
+    },
+    "20": {
+        "type": 0,
+        "val": 7
+    },
+    "21": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 5,
+        "name": "i32"
+    },
+    "23": {
+        "type": 0,
+        "val": 7
+    },
+    "24": {
+        "type": 5,
+        "name": "c"
+    },
+    "25": {
+        "type": 5,
+        "name": "i32"
+    },
+    "26": {
+        "type": 5,
+        "name": "c"
+    },
+    "27": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "28",
+        "indexes": [
+            "46",
+            "47"
+        ]
+    },
+    "28": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            4608,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "29"
+    },
+    "29": {
+        "type": 105,
+        "inputs": [
+            "30"
+        ],
+        "expr": "31",
+        "outputShape": [
+            4608,
+            512
+        ]
+    },
+    "30": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "31": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "32",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "32": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "33",
+        "indexes": [
+            "34",
+            "37",
+            "42",
+            "45"
+        ]
+    },
+    "33": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "34": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "i31"
+    },
+    "36": {
+        "type": 0,
+        "val": 1536
+    },
+    "37": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "38",
+        "rhs": "41"
+    },
+    "38": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "39",
+        "rhs": "40"
+    },
+    "39": {
+        "type": 5,
+        "name": "i31"
+    },
+    "40": {
+        "type": 0,
+        "val": 512
+    },
+    "41": {
+        "type": 0,
+        "val": 3
+    },
+    "42": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "43",
+        "rhs": "44"
+    },
+    "43": {
+        "type": 5,
+        "name": "i31"
+    },
+    "44": {
+        "type": 0,
+        "val": 512
+    },
+    "45": {
+        "type": 5,
+        "name": "c"
+    },
+    "46": {
+        "type": 5,
+        "name": "i31"
+    },
+    "47": {
+        "type": 5,
+        "name": "c"
+    },
+    "48": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "49",
+        "rhs": "58"
+    },
+    "49": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "50",
+        "rhs": "53"
+    },
+    "50": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "51",
+        "rhs": "52"
+    },
+    "51": {
+        "type": 0,
+        "val": 49
+    },
+    "52": {
+        "type": 5,
+        "name": "n"
+    },
+    "53": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 0,
+        "val": 7
+    },
+    "55": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "56",
+        "rhs": "57"
+    },
+    "56": {
+        "type": 5,
+        "name": "i17"
+    },
+    "57": {
+        "type": 0,
+        "val": -1
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "60"
+    },
+    "59": {
+        "type": 5,
+        "name": "i27"
+    },
+    "60": {
+        "type": 0,
+        "val": -1
+    },
+    "61": {
+        "type": 5,
+        "name": "i31"
+    },
+    "62": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "63",
+        "rhs": "70"
+    },
+    "63": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "64",
+        "rhs": "67"
+    },
+    "64": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 0,
+        "val": 1536
+    },
+    "66": {
+        "type": 5,
+        "name": "i18"
+    },
+    "67": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "68",
+        "rhs": "69"
+    },
+    "68": {
+        "type": 0,
+        "val": 512
+    },
+    "69": {
+        "type": 5,
+        "name": "i28"
+    },
+    "70": {
+        "type": 5,
+        "name": "f"
+    },
+    "71": {
+        "type": 5,
+        "name": "i27"
+    },
+    "72": {
+        "type": 5,
+        "name": "i17"
+    },
+    "73": {
+        "type": 5,
+        "name": "n"
+    },
+    "74": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "75",
+        "rhs": "76"
+    },
+    "75": {
+        "type": 5,
+        "name": "w"
+    },
+    "76": {
+        "type": 5,
+        "name": "s"
+    },
+    "77": {
+        "type": 5,
+        "name": "s"
+    },
+    "78": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "79",
+        "rhs": "80"
+    },
+    "79": {
+        "type": 5,
+        "name": "h"
+    },
+    "80": {
+        "type": 5,
+        "name": "r"
+    },
+    "81": {
+        "type": 5,
+        "name": "r"
+    },
+    "82": {
+        "type": 5,
+        "name": "n"
+    },
+    "83": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_9.expr b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_9.expr
new file mode 100644
index 00000000..23590f31
--- /dev/null
+++ b/test/nnet/log/conv2gemm/Conv2gemm_NCHW_RSFC_9.expr
@@ -0,0 +1,593 @@
+{
+    "Version": 1,
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ],
+            "f": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                3
+            ],
+            "s": [
+                0,
+                3
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "68",
+            "71",
+            "72",
+            "75",
+            "76",
+            "77"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            1,
+            0,
+            1,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i27": [
+                1,
+                8
+            ],
+            "i28": [
+                0,
+                3
+            ],
+            "i17": [
+                1,
+                8
+            ],
+            "i18": [
+                0,
+                3
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "4",
+        "indexes": [
+            "46",
+            "59"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "i31": [
+                0,
+                4608
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "25"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "7",
+        "indexes": [
+            "23",
+            "24"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "8"
+    },
+    "8": {
+        "type": 105,
+        "inputs": [
+            "9"
+        ],
+        "expr": "10",
+        "outputShape": [
+            49,
+            512
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "11",
+        "loopVarRanges": {
+            "i32": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "11": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "12",
+        "indexes": [
+            "13",
+            "16",
+            "19",
+            "22"
+        ]
+    },
+    "12": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            7,
+            7,
+            512
+        ],
+        "paddings": [
+            0,
+            1,
+            1,
+            0
+        ],
+        "source": "-1"
+    },
+    "13": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "i32"
+    },
+    "15": {
+        "type": 0,
+        "val": 49
+    },
+    "16": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i32"
+    },
+    "18": {
+        "type": 0,
+        "val": 7
+    },
+    "19": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 5,
+        "name": "i32"
+    },
+    "21": {
+        "type": 0,
+        "val": 7
+    },
+    "22": {
+        "type": 5,
+        "name": "c"
+    },
+    "23": {
+        "type": 5,
+        "name": "i32"
+    },
+    "24": {
+        "type": 5,
+        "name": "c"
+    },
+    "25": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "26",
+        "indexes": [
+            "44",
+            "45"
+        ]
+    },
+    "26": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            4608,
+            512
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "27"
+    },
+    "27": {
+        "type": 105,
+        "inputs": [
+            "28"
+        ],
+        "expr": "29",
+        "outputShape": [
+            4608,
+            512
+        ]
+    },
+    "28": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "29": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "30",
+        "loopVarRanges": {
+            "i31": [
+                0,
+                4608
+            ],
+            "c": [
+                0,
+                512
+            ]
+        }
+    },
+    "30": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "31",
+        "indexes": [
+            "32",
+            "35",
+            "40",
+            "43"
+        ]
+    },
+    "31": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            3,
+            3,
+            512,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "32": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "i31"
+    },
+    "34": {
+        "type": 0,
+        "val": 1536
+    },
+    "35": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "36",
+        "rhs": "39"
+    },
+    "36": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "37",
+        "rhs": "38"
+    },
+    "37": {
+        "type": 5,
+        "name": "i31"
+    },
+    "38": {
+        "type": 0,
+        "val": 512
+    },
+    "39": {
+        "type": 0,
+        "val": 3
+    },
+    "40": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 5,
+        "name": "i31"
+    },
+    "42": {
+        "type": 0,
+        "val": 512
+    },
+    "43": {
+        "type": 5,
+        "name": "c"
+    },
+    "44": {
+        "type": 5,
+        "name": "i31"
+    },
+    "45": {
+        "type": 5,
+        "name": "c"
+    },
+    "46": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "47",
+        "rhs": "56"
+    },
+    "47": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "48",
+        "rhs": "51"
+    },
+    "48": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "49",
+        "rhs": "50"
+    },
+    "49": {
+        "type": 0,
+        "val": 49
+    },
+    "50": {
+        "type": 5,
+        "name": "n"
+    },
+    "51": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "52",
+        "rhs": "53"
+    },
+    "52": {
+        "type": 0,
+        "val": 7
+    },
+    "53": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 5,
+        "name": "i17"
+    },
+    "55": {
+        "type": 0,
+        "val": -1
+    },
+    "56": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "57",
+        "rhs": "58"
+    },
+    "57": {
+        "type": 5,
+        "name": "i27"
+    },
+    "58": {
+        "type": 0,
+        "val": -1
+    },
+    "59": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "60",
+        "rhs": "67"
+    },
+    "60": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "61",
+        "rhs": "64"
+    },
+    "61": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "62",
+        "rhs": "63"
+    },
+    "62": {
+        "type": 0,
+        "val": 1536
+    },
+    "63": {
+        "type": 5,
+        "name": "i18"
+    },
+    "64": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "65",
+        "rhs": "66"
+    },
+    "65": {
+        "type": 0,
+        "val": 512
+    },
+    "66": {
+        "type": 5,
+        "name": "i28"
+    },
+    "67": {
+        "type": 5,
+        "name": "f"
+    },
+    "68": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 5,
+        "name": "w"
+    },
+    "70": {
+        "type": 5,
+        "name": "s"
+    },
+    "71": {
+        "type": 5,
+        "name": "s"
+    },
+    "72": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "73",
+        "rhs": "74"
+    },
+    "73": {
+        "type": 5,
+        "name": "h"
+    },
+    "74": {
+        "type": 5,
+        "name": "r"
+    },
+    "75": {
+        "type": 5,
+        "name": "r"
+    },
+    "76": {
+        "type": 5,
+        "name": "n"
+    },
+    "77": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_0.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_0.expr
new file mode 100644
index 00000000..1aa153bf
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_0.expr
@@ -0,0 +1,170 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 0 Init: ruleBasedDFS: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ],
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "14"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6",
+            "9"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "n"
+    },
+    "5": {
+        "type": 5,
+        "name": "c"
+    },
+    "6": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "7",
+        "rhs": "8"
+    },
+    "7": {
+        "type": 5,
+        "name": "h"
+    },
+    "8": {
+        "type": 5,
+        "name": "r"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "13"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "w"
+    },
+    "12": {
+        "type": 5,
+        "name": "s"
+    },
+    "13": {
+        "type": 0,
+        "val": -3
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "15",
+        "indexes": [
+            "16",
+            "17",
+            "18",
+            "19"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "16": {
+        "type": 5,
+        "name": "f"
+    },
+    "17": {
+        "type": 5,
+        "name": "c"
+    },
+    "18": {
+        "type": 5,
+        "name": "r"
+    },
+    "19": {
+        "type": 5,
+        "name": "s"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_1.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_1.expr
new file mode 100644
index 00000000..14e130e6
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_1.expr
@@ -0,0 +1,247 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 1 Rule3StageSplit: Separate sum iters: [r,s]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "22",
+            "23",
+            "24",
+            "25",
+            "26",
+            "27"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "16"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "11"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "9",
+        "rhs": "10"
+    },
+    "9": {
+        "type": 5,
+        "name": "h"
+    },
+    "10": {
+        "type": 5,
+        "name": "r"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "15"
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "w"
+    },
+    "14": {
+        "type": 5,
+        "name": "s"
+    },
+    "15": {
+        "type": 0,
+        "val": -3
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "f"
+    },
+    "19": {
+        "type": 5,
+        "name": "c"
+    },
+    "20": {
+        "type": 5,
+        "name": "r"
+    },
+    "21": {
+        "type": 5,
+        "name": "s"
+    },
+    "22": {
+        "type": 5,
+        "name": "r"
+    },
+    "23": {
+        "type": 5,
+        "name": "s"
+    },
+    "24": {
+        "type": 5,
+        "name": "n"
+    },
+    "25": {
+        "type": 5,
+        "name": "f"
+    },
+    "26": {
+        "type": 5,
+        "name": "h"
+    },
+    "27": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_10.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_10.expr
new file mode 100644
index 00000000..3cfa1fbd
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_10.expr
@@ -0,0 +1,889 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 10 Rule6KenerlMatching: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "102",
+            "105",
+            "106",
+            "109",
+            "110",
+            "111"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            3,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i30": [
+                3,
+                10
+            ],
+            "i31": [
+                0,
+                7
+            ],
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "4",
+        "indexes": [
+            "82",
+            "93"
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            49,
+            896
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 101,
+        "inputs": [
+            "6",
+            "22"
+        ],
+        "expr": "40",
+        "args": [
+            1,
+            49,
+            896,
+            2048,
+            false,
+            true
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "7"
+    },
+    "7": {
+        "type": 105,
+        "inputs": [
+            "8"
+        ],
+        "expr": "9",
+        "outputShape": [
+            49,
+            2048
+        ]
+    },
+    "8": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "9": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "10",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "10": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "11",
+        "indexes": [
+            "12",
+            "15",
+            "16",
+            "19"
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "12": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "i35"
+    },
+    "14": {
+        "type": 0,
+        "val": 49
+    },
+    "15": {
+        "type": 5,
+        "name": "c"
+    },
+    "16": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "17",
+        "rhs": "18"
+    },
+    "17": {
+        "type": 5,
+        "name": "i35"
+    },
+    "18": {
+        "type": 0,
+        "val": 7
+    },
+    "19": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 5,
+        "name": "i35"
+    },
+    "21": {
+        "type": 0,
+        "val": 7
+    },
+    "22": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            896,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "23"
+    },
+    "23": {
+        "type": 105,
+        "inputs": [
+            "24"
+        ],
+        "expr": "25",
+        "outputShape": [
+            896,
+            2048
+        ]
+    },
+    "24": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "25": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "26",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "26": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "27",
+        "indexes": [
+            "28",
+            "31",
+            "32",
+            "37"
+        ]
+    },
+    "27": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "28": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "i34"
+    },
+    "30": {
+        "type": 0,
+        "val": 7
+    },
+    "31": {
+        "type": 5,
+        "name": "c"
+    },
+    "32": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "33",
+        "rhs": "36"
+    },
+    "33": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 5,
+        "name": "i34"
+    },
+    "35": {
+        "type": 0,
+        "val": 7
+    },
+    "36": {
+        "type": 0,
+        "val": 1
+    },
+    "37": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "38",
+        "rhs": "39"
+    },
+    "38": {
+        "type": 5,
+        "name": "i34"
+    },
+    "39": {
+        "type": 0,
+        "val": 7
+    },
+    "40": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "41",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "i34": [
+                0,
+                896
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "41": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "42",
+        "rhs": "61"
+    },
+    "42": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "43",
+        "indexes": [
+            "59",
+            "60"
+        ]
+    },
+    "43": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "44"
+    },
+    "44": {
+        "type": 105,
+        "inputs": [
+            "45"
+        ],
+        "expr": "46",
+        "outputShape": [
+            49,
+            2048
+        ]
+    },
+    "45": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "46": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "47",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "47": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "48",
+        "indexes": [
+            "49",
+            "52",
+            "53",
+            "56"
+        ]
+    },
+    "48": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "49": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "50",
+        "rhs": "51"
+    },
+    "50": {
+        "type": 5,
+        "name": "i35"
+    },
+    "51": {
+        "type": 0,
+        "val": 49
+    },
+    "52": {
+        "type": 5,
+        "name": "c"
+    },
+    "53": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 5,
+        "name": "i35"
+    },
+    "55": {
+        "type": 0,
+        "val": 7
+    },
+    "56": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "57",
+        "rhs": "58"
+    },
+    "57": {
+        "type": 5,
+        "name": "i35"
+    },
+    "58": {
+        "type": 0,
+        "val": 7
+    },
+    "59": {
+        "type": 5,
+        "name": "i35"
+    },
+    "60": {
+        "type": 5,
+        "name": "c"
+    },
+    "61": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "62",
+        "indexes": [
+            "80",
+            "81"
+        ]
+    },
+    "62": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            896,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "63"
+    },
+    "63": {
+        "type": 105,
+        "inputs": [
+            "64"
+        ],
+        "expr": "65",
+        "outputShape": [
+            896,
+            2048
+        ]
+    },
+    "64": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "65": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "66",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "66": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "67",
+        "indexes": [
+            "68",
+            "71",
+            "72",
+            "77"
+        ]
+    },
+    "67": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "68": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "69",
+        "rhs": "70"
+    },
+    "69": {
+        "type": 5,
+        "name": "i34"
+    },
+    "70": {
+        "type": 0,
+        "val": 7
+    },
+    "71": {
+        "type": 5,
+        "name": "c"
+    },
+    "72": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "73",
+        "rhs": "76"
+    },
+    "73": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "74",
+        "rhs": "75"
+    },
+    "74": {
+        "type": 5,
+        "name": "i34"
+    },
+    "75": {
+        "type": 0,
+        "val": 7
+    },
+    "76": {
+        "type": 0,
+        "val": 1
+    },
+    "77": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "78",
+        "rhs": "79"
+    },
+    "78": {
+        "type": 5,
+        "name": "i34"
+    },
+    "79": {
+        "type": 0,
+        "val": 7
+    },
+    "80": {
+        "type": 5,
+        "name": "i34"
+    },
+    "81": {
+        "type": 5,
+        "name": "c"
+    },
+    "82": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "83",
+        "rhs": "90"
+    },
+    "83": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "84",
+        "rhs": "87"
+    },
+    "84": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "85",
+        "rhs": "86"
+    },
+    "85": {
+        "type": 0,
+        "val": 49
+    },
+    "86": {
+        "type": 5,
+        "name": "n"
+    },
+    "87": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "88",
+        "rhs": "89"
+    },
+    "88": {
+        "type": 0,
+        "val": 7
+    },
+    "89": {
+        "type": 5,
+        "name": "i19"
+    },
+    "90": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "91",
+        "rhs": "92"
+    },
+    "91": {
+        "type": 5,
+        "name": "i30"
+    },
+    "92": {
+        "type": 0,
+        "val": -3
+    },
+    "93": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "94",
+        "rhs": "101"
+    },
+    "94": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "95",
+        "rhs": "98"
+    },
+    "95": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "96",
+        "rhs": "97"
+    },
+    "96": {
+        "type": 0,
+        "val": 7
+    },
+    "97": {
+        "type": 5,
+        "name": "f"
+    },
+    "98": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "99",
+        "rhs": "100"
+    },
+    "99": {
+        "type": 0,
+        "val": 7
+    },
+    "100": {
+        "type": 5,
+        "name": "i20"
+    },
+    "101": {
+        "type": 5,
+        "name": "i31"
+    },
+    "102": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "103",
+        "rhs": "104"
+    },
+    "103": {
+        "type": 5,
+        "name": "w"
+    },
+    "104": {
+        "type": 5,
+        "name": "s"
+    },
+    "105": {
+        "type": 5,
+        "name": "s"
+    },
+    "106": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "107",
+        "rhs": "108"
+    },
+    "107": {
+        "type": 5,
+        "name": "h"
+    },
+    "108": {
+        "type": 5,
+        "name": "r"
+    },
+    "109": {
+        "type": 5,
+        "name": "r"
+    },
+    "110": {
+        "type": 5,
+        "name": "n"
+    },
+    "111": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_11.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_11.expr
new file mode 100644
index 00000000..39d1ea69
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_11.expr
@@ -0,0 +1,1564 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 11 Rule90TwoStageElementWise: ",
+    "0": {
+        "type": 4,
+        "name": "T21",
+        "shape": [
+            1,
+            128,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "1"
+    },
+    "1": {
+        "type": 105,
+        "inputs": [
+            "2"
+        ],
+        "expr": "80",
+        "outputShape": [
+            1,
+            128,
+            7,
+            7
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            49,
+            896
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 101,
+        "inputs": [
+            "4",
+            "20"
+        ],
+        "expr": "38",
+        "args": [
+            1,
+            49,
+            896,
+            2048,
+            false,
+            true
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 105,
+        "inputs": [
+            "6"
+        ],
+        "expr": "7",
+        "outputShape": [
+            49,
+            2048
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "7": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "8",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "9",
+        "indexes": [
+            "10",
+            "13",
+            "14",
+            "17"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "i35"
+    },
+    "12": {
+        "type": 0,
+        "val": 49
+    },
+    "13": {
+        "type": 5,
+        "name": "c"
+    },
+    "14": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "i35"
+    },
+    "16": {
+        "type": 0,
+        "val": 7
+    },
+    "17": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "18",
+        "rhs": "19"
+    },
+    "18": {
+        "type": 5,
+        "name": "i35"
+    },
+    "19": {
+        "type": 0,
+        "val": 7
+    },
+    "20": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            896,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "21"
+    },
+    "21": {
+        "type": 105,
+        "inputs": [
+            "22"
+        ],
+        "expr": "23",
+        "outputShape": [
+            896,
+            2048
+        ]
+    },
+    "22": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "23": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "24",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "24": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "25",
+        "indexes": [
+            "26",
+            "29",
+            "30",
+            "35"
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 5,
+        "name": "i34"
+    },
+    "28": {
+        "type": 0,
+        "val": 7
+    },
+    "29": {
+        "type": 5,
+        "name": "c"
+    },
+    "30": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "31",
+        "rhs": "34"
+    },
+    "31": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "i34"
+    },
+    "33": {
+        "type": 0,
+        "val": 7
+    },
+    "34": {
+        "type": 0,
+        "val": 1
+    },
+    "35": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "36",
+        "rhs": "37"
+    },
+    "36": {
+        "type": 5,
+        "name": "i34"
+    },
+    "37": {
+        "type": 0,
+        "val": 7
+    },
+    "38": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "39",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "i34": [
+                0,
+                896
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "39": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "40",
+        "rhs": "59"
+    },
+    "40": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "41",
+        "indexes": [
+            "57",
+            "58"
+        ]
+    },
+    "41": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "42"
+    },
+    "42": {
+        "type": 105,
+        "inputs": [
+            "43"
+        ],
+        "expr": "44",
+        "outputShape": [
+            49,
+            2048
+        ]
+    },
+    "43": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "44": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "45",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "45": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "46",
+        "indexes": [
+            "47",
+            "50",
+            "51",
+            "54"
+        ]
+    },
+    "46": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "47": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "48",
+        "rhs": "49"
+    },
+    "48": {
+        "type": 5,
+        "name": "i35"
+    },
+    "49": {
+        "type": 0,
+        "val": 49
+    },
+    "50": {
+        "type": 5,
+        "name": "c"
+    },
+    "51": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "52",
+        "rhs": "53"
+    },
+    "52": {
+        "type": 5,
+        "name": "i35"
+    },
+    "53": {
+        "type": 0,
+        "val": 7
+    },
+    "54": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 5,
+        "name": "i35"
+    },
+    "56": {
+        "type": 0,
+        "val": 7
+    },
+    "57": {
+        "type": 5,
+        "name": "i35"
+    },
+    "58": {
+        "type": 5,
+        "name": "c"
+    },
+    "59": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "60",
+        "indexes": [
+            "78",
+            "79"
+        ]
+    },
+    "60": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            896,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "61"
+    },
+    "61": {
+        "type": 105,
+        "inputs": [
+            "62"
+        ],
+        "expr": "63",
+        "outputShape": [
+            896,
+            2048
+        ]
+    },
+    "62": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "63": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "64",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "64": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "65",
+        "indexes": [
+            "66",
+            "69",
+            "70",
+            "75"
+        ]
+    },
+    "65": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "66": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "67",
+        "rhs": "68"
+    },
+    "67": {
+        "type": 5,
+        "name": "i34"
+    },
+    "68": {
+        "type": 0,
+        "val": 7
+    },
+    "69": {
+        "type": 5,
+        "name": "c"
+    },
+    "70": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "71",
+        "rhs": "74"
+    },
+    "71": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "72",
+        "rhs": "73"
+    },
+    "72": {
+        "type": 5,
+        "name": "i34"
+    },
+    "73": {
+        "type": 0,
+        "val": 7
+    },
+    "74": {
+        "type": 0,
+        "val": 1
+    },
+    "75": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "76",
+        "rhs": "77"
+    },
+    "76": {
+        "type": 5,
+        "name": "i34"
+    },
+    "77": {
+        "type": 0,
+        "val": 7
+    },
+    "78": {
+        "type": 5,
+        "name": "i34"
+    },
+    "79": {
+        "type": 5,
+        "name": "c"
+    },
+    "80": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "81",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "81": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "82",
+        "indexes": [
+            "182",
+            "185",
+            "186",
+            "189",
+            "190",
+            "191"
+        ]
+    },
+    "82": {
+        "type": 2,
+        "paddings": [
+            3,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "83",
+        "loopVarRanges": {
+            "i30": [
+                3,
+                10
+            ],
+            "i31": [
+                0,
+                7
+            ],
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ]
+        }
+    },
+    "83": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "84",
+        "indexes": [
+            "162",
+            "173"
+        ]
+    },
+    "84": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            49,
+            896
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "85"
+    },
+    "85": {
+        "type": 101,
+        "inputs": [
+            "86",
+            "102"
+        ],
+        "expr": "120",
+        "args": [
+            1,
+            49,
+            896,
+            2048,
+            false,
+            true
+        ]
+    },
+    "86": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "87"
+    },
+    "87": {
+        "type": 105,
+        "inputs": [
+            "88"
+        ],
+        "expr": "89",
+        "outputShape": [
+            49,
+            2048
+        ]
+    },
+    "88": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "89": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "90",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "90": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "91",
+        "indexes": [
+            "92",
+            "95",
+            "96",
+            "99"
+        ]
+    },
+    "91": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "92": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "93",
+        "rhs": "94"
+    },
+    "93": {
+        "type": 5,
+        "name": "i35"
+    },
+    "94": {
+        "type": 0,
+        "val": 49
+    },
+    "95": {
+        "type": 5,
+        "name": "c"
+    },
+    "96": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "97",
+        "rhs": "98"
+    },
+    "97": {
+        "type": 5,
+        "name": "i35"
+    },
+    "98": {
+        "type": 0,
+        "val": 7
+    },
+    "99": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "100",
+        "rhs": "101"
+    },
+    "100": {
+        "type": 5,
+        "name": "i35"
+    },
+    "101": {
+        "type": 0,
+        "val": 7
+    },
+    "102": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            896,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "103"
+    },
+    "103": {
+        "type": 105,
+        "inputs": [
+            "104"
+        ],
+        "expr": "105",
+        "outputShape": [
+            896,
+            2048
+        ]
+    },
+    "104": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "105": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "106",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "106": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "107",
+        "indexes": [
+            "108",
+            "111",
+            "112",
+            "117"
+        ]
+    },
+    "107": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "108": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "109",
+        "rhs": "110"
+    },
+    "109": {
+        "type": 5,
+        "name": "i34"
+    },
+    "110": {
+        "type": 0,
+        "val": 7
+    },
+    "111": {
+        "type": 5,
+        "name": "c"
+    },
+    "112": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "113",
+        "rhs": "116"
+    },
+    "113": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "114",
+        "rhs": "115"
+    },
+    "114": {
+        "type": 5,
+        "name": "i34"
+    },
+    "115": {
+        "type": 0,
+        "val": 7
+    },
+    "116": {
+        "type": 0,
+        "val": 1
+    },
+    "117": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "118",
+        "rhs": "119"
+    },
+    "118": {
+        "type": 5,
+        "name": "i34"
+    },
+    "119": {
+        "type": 0,
+        "val": 7
+    },
+    "120": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "121",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "i34": [
+                0,
+                896
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "121": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "122",
+        "rhs": "141"
+    },
+    "122": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "123",
+        "indexes": [
+            "139",
+            "140"
+        ]
+    },
+    "123": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "124"
+    },
+    "124": {
+        "type": 105,
+        "inputs": [
+            "125"
+        ],
+        "expr": "126",
+        "outputShape": [
+            49,
+            2048
+        ]
+    },
+    "125": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "126": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "127",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "127": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "128",
+        "indexes": [
+            "129",
+            "132",
+            "133",
+            "136"
+        ]
+    },
+    "128": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "129": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "130",
+        "rhs": "131"
+    },
+    "130": {
+        "type": 5,
+        "name": "i35"
+    },
+    "131": {
+        "type": 0,
+        "val": 49
+    },
+    "132": {
+        "type": 5,
+        "name": "c"
+    },
+    "133": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "134",
+        "rhs": "135"
+    },
+    "134": {
+        "type": 5,
+        "name": "i35"
+    },
+    "135": {
+        "type": 0,
+        "val": 7
+    },
+    "136": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "137",
+        "rhs": "138"
+    },
+    "137": {
+        "type": 5,
+        "name": "i35"
+    },
+    "138": {
+        "type": 0,
+        "val": 7
+    },
+    "139": {
+        "type": 5,
+        "name": "i35"
+    },
+    "140": {
+        "type": 5,
+        "name": "c"
+    },
+    "141": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "142",
+        "indexes": [
+            "160",
+            "161"
+        ]
+    },
+    "142": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            896,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "143"
+    },
+    "143": {
+        "type": 105,
+        "inputs": [
+            "144"
+        ],
+        "expr": "145",
+        "outputShape": [
+            896,
+            2048
+        ]
+    },
+    "144": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "145": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "146",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "146": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "147",
+        "indexes": [
+            "148",
+            "151",
+            "152",
+            "157"
+        ]
+    },
+    "147": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "148": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "149",
+        "rhs": "150"
+    },
+    "149": {
+        "type": 5,
+        "name": "i34"
+    },
+    "150": {
+        "type": 0,
+        "val": 7
+    },
+    "151": {
+        "type": 5,
+        "name": "c"
+    },
+    "152": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "153",
+        "rhs": "156"
+    },
+    "153": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "154",
+        "rhs": "155"
+    },
+    "154": {
+        "type": 5,
+        "name": "i34"
+    },
+    "155": {
+        "type": 0,
+        "val": 7
+    },
+    "156": {
+        "type": 0,
+        "val": 1
+    },
+    "157": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "158",
+        "rhs": "159"
+    },
+    "158": {
+        "type": 5,
+        "name": "i34"
+    },
+    "159": {
+        "type": 0,
+        "val": 7
+    },
+    "160": {
+        "type": 5,
+        "name": "i34"
+    },
+    "161": {
+        "type": 5,
+        "name": "c"
+    },
+    "162": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "163",
+        "rhs": "170"
+    },
+    "163": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "164",
+        "rhs": "167"
+    },
+    "164": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "165",
+        "rhs": "166"
+    },
+    "165": {
+        "type": 0,
+        "val": 49
+    },
+    "166": {
+        "type": 5,
+        "name": "n"
+    },
+    "167": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "168",
+        "rhs": "169"
+    },
+    "168": {
+        "type": 0,
+        "val": 7
+    },
+    "169": {
+        "type": 5,
+        "name": "i19"
+    },
+    "170": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "171",
+        "rhs": "172"
+    },
+    "171": {
+        "type": 5,
+        "name": "i30"
+    },
+    "172": {
+        "type": 0,
+        "val": -3
+    },
+    "173": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "174",
+        "rhs": "181"
+    },
+    "174": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "175",
+        "rhs": "178"
+    },
+    "175": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "176",
+        "rhs": "177"
+    },
+    "176": {
+        "type": 0,
+        "val": 7
+    },
+    "177": {
+        "type": 5,
+        "name": "f"
+    },
+    "178": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "179",
+        "rhs": "180"
+    },
+    "179": {
+        "type": 0,
+        "val": 7
+    },
+    "180": {
+        "type": 5,
+        "name": "i20"
+    },
+    "181": {
+        "type": 5,
+        "name": "i31"
+    },
+    "182": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "183",
+        "rhs": "184"
+    },
+    "183": {
+        "type": 5,
+        "name": "w"
+    },
+    "184": {
+        "type": 5,
+        "name": "s"
+    },
+    "185": {
+        "type": 5,
+        "name": "s"
+    },
+    "186": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "187",
+        "rhs": "188"
+    },
+    "187": {
+        "type": 5,
+        "name": "h"
+    },
+    "188": {
+        "type": 5,
+        "name": "r"
+    },
+    "189": {
+        "type": 5,
+        "name": "r"
+    },
+    "190": {
+        "type": 5,
+        "name": "n"
+    },
+    "191": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_2.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_2.expr
new file mode 100644
index 00000000..53e2135a
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_2.expr
@@ -0,0 +1,322 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 2 Rule2VariableMerging: Old iters: [h,r], new iters: [i19,i20] phis: [(h + r),r] psis: [(i19 - i20),i20]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "30",
+            "31",
+            "32",
+            "33",
+            "34",
+            "35"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "22",
+            "25",
+            "26",
+            "27",
+            "28",
+            "29"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "16"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "11"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 5,
+        "name": "i19"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "15"
+    },
+    "12": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 5,
+        "name": "s"
+    },
+    "14": {
+        "type": 5,
+        "name": "w"
+    },
+    "15": {
+        "type": 0,
+        "val": -3
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "f"
+    },
+    "19": {
+        "type": 5,
+        "name": "c"
+    },
+    "20": {
+        "type": 5,
+        "name": "i20"
+    },
+    "21": {
+        "type": 5,
+        "name": "s"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "h"
+    },
+    "24": {
+        "type": 5,
+        "name": "r"
+    },
+    "25": {
+        "type": 5,
+        "name": "r"
+    },
+    "26": {
+        "type": 5,
+        "name": "s"
+    },
+    "27": {
+        "type": 5,
+        "name": "n"
+    },
+    "28": {
+        "type": 5,
+        "name": "f"
+    },
+    "29": {
+        "type": 5,
+        "name": "w"
+    },
+    "30": {
+        "type": 5,
+        "name": "r"
+    },
+    "31": {
+        "type": 5,
+        "name": "s"
+    },
+    "32": {
+        "type": 5,
+        "name": "n"
+    },
+    "33": {
+        "type": 5,
+        "name": "f"
+    },
+    "34": {
+        "type": 5,
+        "name": "h"
+    },
+    "35": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_3.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_3.expr
new file mode 100644
index 00000000..43a0cfe0
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_3.expr
@@ -0,0 +1,247 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 3 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "20",
+            "23",
+            "24",
+            "25",
+            "26",
+            "27"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "14"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "9"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 5,
+        "name": "i19"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "13"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "s"
+    },
+    "12": {
+        "type": 5,
+        "name": "w"
+    },
+    "13": {
+        "type": 0,
+        "val": -3
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "15",
+        "indexes": [
+            "16",
+            "17",
+            "18",
+            "19"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "16": {
+        "type": 5,
+        "name": "f"
+    },
+    "17": {
+        "type": 5,
+        "name": "c"
+    },
+    "18": {
+        "type": 5,
+        "name": "i20"
+    },
+    "19": {
+        "type": 5,
+        "name": "s"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "h"
+    },
+    "22": {
+        "type": 5,
+        "name": "r"
+    },
+    "23": {
+        "type": 5,
+        "name": "r"
+    },
+    "24": {
+        "type": 5,
+        "name": "s"
+    },
+    "25": {
+        "type": 5,
+        "name": "n"
+    },
+    "26": {
+        "type": 5,
+        "name": "f"
+    },
+    "27": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_4.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_4.expr
new file mode 100644
index 00000000..7bd3e7d3
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_4.expr
@@ -0,0 +1,322 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 4 Rule2VariableMerging: Old iters: [w,s], new iters: [i30,i31] phis: [(w + s),s] psis: [(i30 - i31),i31]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "28",
+            "31",
+            "32",
+            "33",
+            "34",
+            "35"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "w": [
+                0,
+                7
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "4",
+        "indexes": [
+            "20",
+            "23",
+            "24",
+            "25",
+            "26",
+            "27"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i30": [
+                0,
+                13
+            ],
+            "i31": [
+                0,
+                7
+            ],
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "14"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "11"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 5,
+        "name": "i19"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i30"
+    },
+    "13": {
+        "type": 0,
+        "val": -3
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "15",
+        "indexes": [
+            "16",
+            "17",
+            "18",
+            "19"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "16": {
+        "type": 5,
+        "name": "f"
+    },
+    "17": {
+        "type": 5,
+        "name": "c"
+    },
+    "18": {
+        "type": 5,
+        "name": "i20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i31"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "w"
+    },
+    "22": {
+        "type": 5,
+        "name": "s"
+    },
+    "23": {
+        "type": 5,
+        "name": "s"
+    },
+    "24": {
+        "type": 5,
+        "name": "i19"
+    },
+    "25": {
+        "type": 5,
+        "name": "i20"
+    },
+    "26": {
+        "type": 5,
+        "name": "n"
+    },
+    "27": {
+        "type": 5,
+        "name": "f"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "h"
+    },
+    "30": {
+        "type": 5,
+        "name": "r"
+    },
+    "31": {
+        "type": 5,
+        "name": "r"
+    },
+    "32": {
+        "type": 5,
+        "name": "s"
+    },
+    "33": {
+        "type": 5,
+        "name": "n"
+    },
+    "34": {
+        "type": 5,
+        "name": "f"
+    },
+    "35": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_5.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_5.expr
new file mode 100644
index 00000000..7e947b2a
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_5.expr
@@ -0,0 +1,247 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 5 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "18",
+            "21",
+            "22",
+            "25",
+            "26",
+            "27"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i30": [
+                0,
+                13
+            ],
+            "i31": [
+                0,
+                7
+            ],
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "12"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "9"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 5,
+        "name": "i19"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i30"
+    },
+    "11": {
+        "type": 0,
+        "val": -3
+    },
+    "12": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "13",
+        "indexes": [
+            "14",
+            "15",
+            "16",
+            "17"
+        ]
+    },
+    "13": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "14": {
+        "type": 5,
+        "name": "f"
+    },
+    "15": {
+        "type": 5,
+        "name": "c"
+    },
+    "16": {
+        "type": 5,
+        "name": "i20"
+    },
+    "17": {
+        "type": 5,
+        "name": "i31"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "w"
+    },
+    "20": {
+        "type": 5,
+        "name": "s"
+    },
+    "21": {
+        "type": 5,
+        "name": "s"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "h"
+    },
+    "24": {
+        "type": 5,
+        "name": "r"
+    },
+    "25": {
+        "type": 5,
+        "name": "r"
+    },
+    "26": {
+        "type": 5,
+        "name": "n"
+    },
+    "27": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_6.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_6.expr
new file mode 100644
index 00000000..a999fb61
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_6.expr
@@ -0,0 +1,247 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 6 Rule5RangeRelaxation: i30 (0,13) to (3,10),",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "18",
+            "21",
+            "22",
+            "25",
+            "26",
+            "27"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            3,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i30": [
+                3,
+                10
+            ],
+            "i31": [
+                0,
+                7
+            ],
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "12"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "8",
+            "9"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "n"
+    },
+    "7": {
+        "type": 5,
+        "name": "c"
+    },
+    "8": {
+        "type": 5,
+        "name": "i19"
+    },
+    "9": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "10",
+        "rhs": "11"
+    },
+    "10": {
+        "type": 5,
+        "name": "i30"
+    },
+    "11": {
+        "type": 0,
+        "val": -3
+    },
+    "12": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "13",
+        "indexes": [
+            "14",
+            "15",
+            "16",
+            "17"
+        ]
+    },
+    "13": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "14": {
+        "type": 5,
+        "name": "f"
+    },
+    "15": {
+        "type": 5,
+        "name": "c"
+    },
+    "16": {
+        "type": 5,
+        "name": "i20"
+    },
+    "17": {
+        "type": 5,
+        "name": "i31"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "w"
+    },
+    "20": {
+        "type": 5,
+        "name": "s"
+    },
+    "21": {
+        "type": 5,
+        "name": "s"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "h"
+    },
+    "24": {
+        "type": 5,
+        "name": "r"
+    },
+    "25": {
+        "type": 5,
+        "name": "r"
+    },
+    "26": {
+        "type": 5,
+        "name": "n"
+    },
+    "27": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_7.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_7.expr
new file mode 100644
index 00000000..cf51ed3c
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_7.expr
@@ -0,0 +1,458 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 7 Rule8GuidedDLT: Toward Matmul. Toward Matmul. guidedDLTMoreVar2 guidedDLTMoreVar2 ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "47",
+            "50",
+            "51",
+            "54",
+            "55",
+            "56"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            3,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i30": [
+                3,
+                10
+            ],
+            "i31": [
+                0,
+                7
+            ],
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "35",
+            "44",
+            "45",
+            "46"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "i30": [
+                3,
+                10
+            ],
+            "i19": [
+                0,
+                7
+            ],
+            "n": [
+                0,
+                1
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "14"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "7",
+        "indexes": [
+            "8",
+            "9",
+            "10",
+            "11"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 5,
+        "name": "n"
+    },
+    "9": {
+        "type": 5,
+        "name": "c"
+    },
+    "10": {
+        "type": 5,
+        "name": "i19"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "i30"
+    },
+    "13": {
+        "type": 0,
+        "val": -3
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "15",
+        "indexes": [
+            "33",
+            "34"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            896,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "16"
+    },
+    "16": {
+        "type": 105,
+        "inputs": [
+            "17"
+        ],
+        "expr": "18",
+        "outputShape": [
+            896,
+            2048
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "19",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "19": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "20",
+        "indexes": [
+            "21",
+            "24",
+            "25",
+            "30"
+        ]
+    },
+    "20": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "21": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 5,
+        "name": "i34"
+    },
+    "23": {
+        "type": 0,
+        "val": 7
+    },
+    "24": {
+        "type": 5,
+        "name": "c"
+    },
+    "25": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "26",
+        "rhs": "29"
+    },
+    "26": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 5,
+        "name": "i34"
+    },
+    "28": {
+        "type": 0,
+        "val": 7
+    },
+    "29": {
+        "type": 0,
+        "val": 1
+    },
+    "30": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 5,
+        "name": "i34"
+    },
+    "32": {
+        "type": 0,
+        "val": 7
+    },
+    "33": {
+        "type": 5,
+        "name": "i34"
+    },
+    "34": {
+        "type": 5,
+        "name": "c"
+    },
+    "35": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "36",
+        "rhs": "43"
+    },
+    "36": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "37",
+        "rhs": "40"
+    },
+    "37": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "38",
+        "rhs": "39"
+    },
+    "38": {
+        "type": 0,
+        "val": 7
+    },
+    "39": {
+        "type": 5,
+        "name": "f"
+    },
+    "40": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "41",
+        "rhs": "42"
+    },
+    "41": {
+        "type": 0,
+        "val": 7
+    },
+    "42": {
+        "type": 5,
+        "name": "i20"
+    },
+    "43": {
+        "type": 5,
+        "name": "i31"
+    },
+    "44": {
+        "type": 5,
+        "name": "i30"
+    },
+    "45": {
+        "type": 5,
+        "name": "i19"
+    },
+    "46": {
+        "type": 5,
+        "name": "n"
+    },
+    "47": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "48",
+        "rhs": "49"
+    },
+    "48": {
+        "type": 5,
+        "name": "w"
+    },
+    "49": {
+        "type": 5,
+        "name": "s"
+    },
+    "50": {
+        "type": 5,
+        "name": "s"
+    },
+    "51": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "52",
+        "rhs": "53"
+    },
+    "52": {
+        "type": 5,
+        "name": "h"
+    },
+    "53": {
+        "type": 5,
+        "name": "r"
+    },
+    "54": {
+        "type": 5,
+        "name": "r"
+    },
+    "55": {
+        "type": 5,
+        "name": "n"
+    },
+    "56": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_8.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_8.expr
new file mode 100644
index 00000000..35f6ff31
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_8.expr
@@ -0,0 +1,639 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 8 Rule8GuidedDLT: Toward Matmul. guidedDLTMoreVar2 ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "72",
+            "75",
+            "76",
+            "79",
+            "80",
+            "81"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            3,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i30": [
+                3,
+                10
+            ],
+            "i31": [
+                0,
+                7
+            ],
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "4",
+        "indexes": [
+            "60",
+            "69",
+            "70",
+            "71"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "i30": [
+                3,
+                10
+            ],
+            "i19": [
+                0,
+                7
+            ],
+            "n": [
+                0,
+                1
+            ]
+        }
+    },
+    "5": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "6",
+        "indexes": [
+            "48",
+            "59"
+        ]
+    },
+    "6": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "7",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "i34": [
+                0,
+                896
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "7": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "8",
+        "rhs": "27"
+    },
+    "8": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "9",
+        "indexes": [
+            "25",
+            "26"
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "10"
+    },
+    "10": {
+        "type": 105,
+        "inputs": [
+            "11"
+        ],
+        "expr": "12",
+        "outputShape": [
+            49,
+            2048
+        ]
+    },
+    "11": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "13": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "14",
+        "indexes": [
+            "15",
+            "18",
+            "19",
+            "22"
+        ]
+    },
+    "14": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "15": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "16",
+        "rhs": "17"
+    },
+    "16": {
+        "type": 5,
+        "name": "i35"
+    },
+    "17": {
+        "type": 0,
+        "val": 49
+    },
+    "18": {
+        "type": 5,
+        "name": "c"
+    },
+    "19": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 5,
+        "name": "i35"
+    },
+    "21": {
+        "type": 0,
+        "val": 7
+    },
+    "22": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 5,
+        "name": "i35"
+    },
+    "24": {
+        "type": 0,
+        "val": 7
+    },
+    "25": {
+        "type": 5,
+        "name": "i35"
+    },
+    "26": {
+        "type": 5,
+        "name": "c"
+    },
+    "27": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "28",
+        "indexes": [
+            "46",
+            "47"
+        ]
+    },
+    "28": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            896,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "29"
+    },
+    "29": {
+        "type": 105,
+        "inputs": [
+            "30"
+        ],
+        "expr": "31",
+        "outputShape": [
+            896,
+            2048
+        ]
+    },
+    "30": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "31": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "32",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "32": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "33",
+        "indexes": [
+            "34",
+            "37",
+            "38",
+            "43"
+        ]
+    },
+    "33": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "34": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "i34"
+    },
+    "36": {
+        "type": 0,
+        "val": 7
+    },
+    "37": {
+        "type": 5,
+        "name": "c"
+    },
+    "38": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "39",
+        "rhs": "42"
+    },
+    "39": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 5,
+        "name": "i34"
+    },
+    "41": {
+        "type": 0,
+        "val": 7
+    },
+    "42": {
+        "type": 0,
+        "val": 1
+    },
+    "43": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "44",
+        "rhs": "45"
+    },
+    "44": {
+        "type": 5,
+        "name": "i34"
+    },
+    "45": {
+        "type": 0,
+        "val": 7
+    },
+    "46": {
+        "type": 5,
+        "name": "i34"
+    },
+    "47": {
+        "type": 5,
+        "name": "c"
+    },
+    "48": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "49",
+        "rhs": "56"
+    },
+    "49": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "50",
+        "rhs": "53"
+    },
+    "50": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "51",
+        "rhs": "52"
+    },
+    "51": {
+        "type": 0,
+        "val": 49
+    },
+    "52": {
+        "type": 5,
+        "name": "n"
+    },
+    "53": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "54",
+        "rhs": "55"
+    },
+    "54": {
+        "type": 0,
+        "val": 7
+    },
+    "55": {
+        "type": 5,
+        "name": "i19"
+    },
+    "56": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "57",
+        "rhs": "58"
+    },
+    "57": {
+        "type": 5,
+        "name": "i30"
+    },
+    "58": {
+        "type": 0,
+        "val": -3
+    },
+    "59": {
+        "type": 5,
+        "name": "i34"
+    },
+    "60": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "61",
+        "rhs": "68"
+    },
+    "61": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "62",
+        "rhs": "65"
+    },
+    "62": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "63",
+        "rhs": "64"
+    },
+    "63": {
+        "type": 0,
+        "val": 7
+    },
+    "64": {
+        "type": 5,
+        "name": "f"
+    },
+    "65": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "66",
+        "rhs": "67"
+    },
+    "66": {
+        "type": 0,
+        "val": 7
+    },
+    "67": {
+        "type": 5,
+        "name": "i20"
+    },
+    "68": {
+        "type": 5,
+        "name": "i31"
+    },
+    "69": {
+        "type": 5,
+        "name": "i30"
+    },
+    "70": {
+        "type": 5,
+        "name": "i19"
+    },
+    "71": {
+        "type": 5,
+        "name": "n"
+    },
+    "72": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "73",
+        "rhs": "74"
+    },
+    "73": {
+        "type": 5,
+        "name": "w"
+    },
+    "74": {
+        "type": 5,
+        "name": "s"
+    },
+    "75": {
+        "type": 5,
+        "name": "s"
+    },
+    "76": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "77",
+        "rhs": "78"
+    },
+    "77": {
+        "type": 5,
+        "name": "h"
+    },
+    "78": {
+        "type": 5,
+        "name": "r"
+    },
+    "79": {
+        "type": 5,
+        "name": "r"
+    },
+    "80": {
+        "type": 5,
+        "name": "n"
+    },
+    "81": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_9.expr b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_9.expr
new file mode 100644
index 00000000..7ba5bb2e
--- /dev/null
+++ b/test/nnet/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_9.expr
@@ -0,0 +1,584 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 9 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ],
+            "h": [
+                0,
+                7
+            ],
+            "w": [
+                0,
+                7
+            ]
+        },
+        "sumVarRanges": {
+            "r": [
+                0,
+                1
+            ],
+            "s": [
+                0,
+                7
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 6,
+        "object": "2",
+        "indexes": [
+            "66",
+            "69",
+            "70",
+            "73",
+            "74",
+            "75"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            3,
+            0,
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i30": [
+                3,
+                10
+            ],
+            "i31": [
+                0,
+                7
+            ],
+            "i19": [
+                0,
+                7
+            ],
+            "i20": [
+                0,
+                1
+            ],
+            "n": [
+                0,
+                1
+            ],
+            "f": [
+                0,
+                128
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "4",
+        "indexes": [
+            "46",
+            "57"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "i34": [
+                0,
+                896
+            ]
+        },
+        "sumVarRanges": {
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "25"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "7",
+        "indexes": [
+            "23",
+            "24"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            49,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "8"
+    },
+    "8": {
+        "type": 105,
+        "inputs": [
+            "9"
+        ],
+        "expr": "10",
+        "outputShape": [
+            49,
+            2048
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "11",
+        "loopVarRanges": {
+            "i35": [
+                0,
+                49
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "11": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "12",
+        "indexes": [
+            "13",
+            "16",
+            "17",
+            "20"
+        ]
+    },
+    "12": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            1,
+            2048,
+            7,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            3
+        ],
+        "source": "-1"
+    },
+    "13": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 5,
+        "name": "i35"
+    },
+    "15": {
+        "type": 0,
+        "val": 49
+    },
+    "16": {
+        "type": 5,
+        "name": "c"
+    },
+    "17": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "18",
+        "rhs": "19"
+    },
+    "18": {
+        "type": 5,
+        "name": "i35"
+    },
+    "19": {
+        "type": 0,
+        "val": 7
+    },
+    "20": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "i35"
+    },
+    "22": {
+        "type": 0,
+        "val": 7
+    },
+    "23": {
+        "type": 5,
+        "name": "i35"
+    },
+    "24": {
+        "type": 5,
+        "name": "c"
+    },
+    "25": {
+        "type": 3,
+        "subExprsNum": 2,
+        "object": "26",
+        "indexes": [
+            "44",
+            "45"
+        ]
+    },
+    "26": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            896,
+            2048
+        ],
+        "paddings": [
+            0,
+            0
+        ],
+        "source": "27"
+    },
+    "27": {
+        "type": 105,
+        "inputs": [
+            "28"
+        ],
+        "expr": "29",
+        "outputShape": [
+            896,
+            2048
+        ]
+    },
+    "28": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "29": {
+        "type": 2,
+        "paddings": [
+            0,
+            0
+        ],
+        "summand": "30",
+        "loopVarRanges": {
+            "i34": [
+                0,
+                896
+            ],
+            "c": [
+                0,
+                2048
+            ]
+        }
+    },
+    "30": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "31",
+        "indexes": [
+            "32",
+            "35",
+            "36",
+            "41"
+        ]
+    },
+    "31": {
+        "type": 4,
+        "name": "K",
+        "shape": [
+            128,
+            2048,
+            1,
+            7
+        ],
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "32": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "33",
+        "rhs": "34"
+    },
+    "33": {
+        "type": 5,
+        "name": "i34"
+    },
+    "34": {
+        "type": 0,
+        "val": 7
+    },
+    "35": {
+        "type": 5,
+        "name": "c"
+    },
+    "36": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "37",
+        "rhs": "40"
+    },
+    "37": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "38",
+        "rhs": "39"
+    },
+    "38": {
+        "type": 5,
+        "name": "i34"
+    },
+    "39": {
+        "type": 0,
+        "val": 7
+    },
+    "40": {
+        "type": 0,
+        "val": 1
+    },
+    "41": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "42",
+        "rhs": "43"
+    },
+    "42": {
+        "type": 5,
+        "name": "i34"
+    },
+    "43": {
+        "type": 0,
+        "val": 7
+    },
+    "44": {
+        "type": 5,
+        "name": "i34"
+    },
+    "45": {
+        "type": 5,
+        "name": "c"
+    },
+    "46": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "47",
+        "rhs": "54"
+    },
+    "47": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "48",
+        "rhs": "51"
+    },
+    "48": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "49",
+        "rhs": "50"
+    },
+    "49": {
+        "type": 0,
+        "val": 49
+    },
+    "50": {
+        "type": 5,
+        "name": "n"
+    },
+    "51": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "52",
+        "rhs": "53"
+    },
+    "52": {
+        "type": 0,
+        "val": 7
+    },
+    "53": {
+        "type": 5,
+        "name": "i19"
+    },
+    "54": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "55",
+        "rhs": "56"
+    },
+    "55": {
+        "type": 5,
+        "name": "i30"
+    },
+    "56": {
+        "type": 0,
+        "val": -3
+    },
+    "57": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "58",
+        "rhs": "65"
+    },
+    "58": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "59",
+        "rhs": "62"
+    },
+    "59": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "60",
+        "rhs": "61"
+    },
+    "60": {
+        "type": 0,
+        "val": 7
+    },
+    "61": {
+        "type": 5,
+        "name": "f"
+    },
+    "62": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "63",
+        "rhs": "64"
+    },
+    "63": {
+        "type": 0,
+        "val": 7
+    },
+    "64": {
+        "type": 5,
+        "name": "i20"
+    },
+    "65": {
+        "type": 5,
+        "name": "i31"
+    },
+    "66": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "67",
+        "rhs": "68"
+    },
+    "67": {
+        "type": 5,
+        "name": "w"
+    },
+    "68": {
+        "type": 5,
+        "name": "s"
+    },
+    "69": {
+        "type": 5,
+        "name": "s"
+    },
+    "70": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "71",
+        "rhs": "72"
+    },
+    "71": {
+        "type": 5,
+        "name": "h"
+    },
+    "72": {
+        "type": 5,
+        "name": "r"
+    },
+    "73": {
+        "type": 5,
+        "name": "r"
+    },
+    "74": {
+        "type": 5,
+        "name": "n"
+    },
+    "75": {
+        "type": 5,
+        "name": "f"
+    }
+}
diff --git a/test/nnet/log/g2bmm/G2BMM_0.expr b/test/nnet/log/g2bmm/G2BMM_0.expr
new file mode 100644
index 00000000..8ea86d64
--- /dev/null
+++ b/test/nnet/log/g2bmm/G2BMM_0.expr
@@ -0,0 +1,143 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 0 Init: ruleBasedDFS: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "7"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "b"
+    },
+    "5": {
+        "type": 5,
+        "name": "m"
+    },
+    "6": {
+        "type": 5,
+        "name": "k"
+    },
+    "7": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "8",
+        "indexes": [
+            "9",
+            "10",
+            "17"
+        ]
+    },
+    "8": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "9": {
+        "type": 5,
+        "name": "b"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "m"
+    },
+    "12": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 0,
+        "val": 4
+    },
+    "14": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "15",
+        "rhs": "16"
+    },
+    "15": {
+        "type": 5,
+        "name": "w"
+    },
+    "16": {
+        "type": 0,
+        "val": -32
+    },
+    "17": {
+        "type": 5,
+        "name": "k"
+    }
+}
diff --git a/test/nnet/log/g2bmm/G2BMM_1.expr b/test/nnet/log/g2bmm/G2BMM_1.expr
new file mode 100644
index 00000000..6c4efe18
--- /dev/null
+++ b/test/nnet/log/g2bmm/G2BMM_1.expr
@@ -0,0 +1,258 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 1 Rule1VariableSplit: Old iters: [m], new iters: [i3,i4] phis: [(m / 4),(m % 4)] psis: [((4 * i3) + i4)]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "2",
+        "indexes": [
+            "28",
+            "31",
+            "34",
+            "35"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i3": [
+                0,
+                2500
+            ],
+            "i4": [
+                0,
+                4
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "13"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "12"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "b"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "11"
+    },
+    "8": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "9",
+        "rhs": "10"
+    },
+    "9": {
+        "type": 0,
+        "val": 4
+    },
+    "10": {
+        "type": 5,
+        "name": "i3"
+    },
+    "11": {
+        "type": 5,
+        "name": "i4"
+    },
+    "12": {
+        "type": 5,
+        "name": "k"
+    },
+    "13": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "14",
+        "indexes": [
+            "15",
+            "16",
+            "27"
+        ]
+    },
+    "14": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "15": {
+        "type": 5,
+        "name": "b"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "22"
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "21"
+    },
+    "18": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 0,
+        "val": 4
+    },
+    "20": {
+        "type": 5,
+        "name": "i3"
+    },
+    "21": {
+        "type": 5,
+        "name": "i4"
+    },
+    "22": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 0,
+        "val": 4
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "w"
+    },
+    "26": {
+        "type": 0,
+        "val": -32
+    },
+    "27": {
+        "type": 5,
+        "name": "k"
+    },
+    "28": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "m"
+    },
+    "30": {
+        "type": 0,
+        "val": 4
+    },
+    "31": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "m"
+    },
+    "33": {
+        "type": 0,
+        "val": 4
+    },
+    "34": {
+        "type": 5,
+        "name": "b"
+    },
+    "35": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/g2bmm/G2BMM_2.expr b/test/nnet/log/g2bmm/G2BMM_2.expr
new file mode 100644
index 00000000..a879d75e
--- /dev/null
+++ b/test/nnet/log/g2bmm/G2BMM_2.expr
@@ -0,0 +1,292 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 2 Rule7DLT: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "2",
+        "indexes": [
+            "31",
+            "34",
+            "37",
+            "38"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i3": [
+                0,
+                2500
+            ],
+            "i4": [
+                0,
+                4
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "16"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "5",
+        "indexes": [
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "9": {
+        "type": 5,
+        "name": "b"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "i3"
+    },
+    "12": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 0,
+        "val": 2500
+    },
+    "14": {
+        "type": 5,
+        "name": "i4"
+    },
+    "15": {
+        "type": 5,
+        "name": "k"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "30"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "b"
+    },
+    "19": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "20",
+        "rhs": "25"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "24"
+    },
+    "21": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "22",
+        "rhs": "23"
+    },
+    "22": {
+        "type": 0,
+        "val": 4
+    },
+    "23": {
+        "type": 5,
+        "name": "i3"
+    },
+    "24": {
+        "type": 5,
+        "name": "i4"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 4
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 5,
+        "name": "w"
+    },
+    "29": {
+        "type": 0,
+        "val": -32
+    },
+    "30": {
+        "type": 5,
+        "name": "k"
+    },
+    "31": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "m"
+    },
+    "33": {
+        "type": 0,
+        "val": 4
+    },
+    "34": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "m"
+    },
+    "36": {
+        "type": 0,
+        "val": 4
+    },
+    "37": {
+        "type": 5,
+        "name": "b"
+    },
+    "38": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/g2bmm/G2BMM_3.expr b/test/nnet/log/g2bmm/G2BMM_3.expr
new file mode 100644
index 00000000..b83d57a0
--- /dev/null
+++ b/test/nnet/log/g2bmm/G2BMM_3.expr
@@ -0,0 +1,306 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 3 Rule7DLT: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "2",
+        "indexes": [
+            "30",
+            "33",
+            "36",
+            "37"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i3": [
+                0,
+                2500
+            ],
+            "i4": [
+                0,
+                4
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "16"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "5",
+        "indexes": [
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "9": {
+        "type": 5,
+        "name": "b"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "i3"
+    },
+    "12": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 0,
+        "val": 2500
+    },
+    "14": {
+        "type": 5,
+        "name": "i4"
+    },
+    "15": {
+        "type": 5,
+        "name": "k"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "17",
+        "indexes": [
+            "21",
+            "22",
+            "29"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "18"
+    },
+    "18": {
+        "type": 105,
+        "inputs": [
+            "19"
+        ],
+        "expr": "20",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "21": {
+        "type": 5,
+        "name": "b"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "28"
+    },
+    "23": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "i3"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 2500
+    },
+    "27": {
+        "type": 5,
+        "name": "i4"
+    },
+    "28": {
+        "type": 5,
+        "name": "w"
+    },
+    "29": {
+        "type": 5,
+        "name": "k"
+    },
+    "30": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 5,
+        "name": "m"
+    },
+    "32": {
+        "type": 0,
+        "val": 4
+    },
+    "33": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 5,
+        "name": "m"
+    },
+    "35": {
+        "type": 0,
+        "val": 4
+    },
+    "36": {
+        "type": 5,
+        "name": "b"
+    },
+    "37": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/g2bmm/G2BMM_4.expr b/test/nnet/log/g2bmm/G2BMM_4.expr
new file mode 100644
index 00000000..b62acc93
--- /dev/null
+++ b/test/nnet/log/g2bmm/G2BMM_4.expr
@@ -0,0 +1,331 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 4 Rule2VariableMerging: Old iters: [i3,i4], new iters: [i7] phis: [(i3 + (2500 * i4))] psis: [(i7 % 2500),(i7 / 2500)]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "2",
+        "indexes": [
+            "31",
+            "34",
+            "37",
+            "38"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i3": [
+                0,
+                2500
+            ],
+            "i4": [
+                0,
+                4
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "4",
+        "indexes": [
+            "24",
+            "29",
+            "30"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i7": [
+                0,
+                10000
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "14"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "7",
+        "indexes": [
+            "11",
+            "12",
+            "13"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "8"
+    },
+    "8": {
+        "type": 105,
+        "inputs": [
+            "9"
+        ],
+        "expr": "10",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "11": {
+        "type": 5,
+        "name": "b"
+    },
+    "12": {
+        "type": 5,
+        "name": "i7"
+    },
+    "13": {
+        "type": 5,
+        "name": "k"
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "15",
+        "indexes": [
+            "19",
+            "20",
+            "23"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "16"
+    },
+    "16": {
+        "type": 105,
+        "inputs": [
+            "17"
+        ],
+        "expr": "18",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "19": {
+        "type": 5,
+        "name": "b"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "i7"
+    },
+    "22": {
+        "type": 5,
+        "name": "w"
+    },
+    "23": {
+        "type": 5,
+        "name": "k"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "i3"
+    },
+    "26": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 0,
+        "val": 2500
+    },
+    "28": {
+        "type": 5,
+        "name": "i4"
+    },
+    "29": {
+        "type": 5,
+        "name": "b"
+    },
+    "30": {
+        "type": 5,
+        "name": "w"
+    },
+    "31": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "m"
+    },
+    "33": {
+        "type": 0,
+        "val": 4
+    },
+    "34": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "m"
+    },
+    "36": {
+        "type": 0,
+        "val": 4
+    },
+    "37": {
+        "type": 5,
+        "name": "b"
+    },
+    "38": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/g2bmm/G2BMM_5.expr b/test/nnet/log/g2bmm/G2BMM_5.expr
new file mode 100644
index 00000000..d984cfa6
--- /dev/null
+++ b/test/nnet/log/g2bmm/G2BMM_5.expr
@@ -0,0 +1,276 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 5 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "2",
+        "indexes": [
+            "22",
+            "31",
+            "32"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i7": [
+                0,
+                10000
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "12"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "5",
+        "indexes": [
+            "9",
+            "10",
+            "11"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "9": {
+        "type": 5,
+        "name": "b"
+    },
+    "10": {
+        "type": 5,
+        "name": "i7"
+    },
+    "11": {
+        "type": 5,
+        "name": "k"
+    },
+    "12": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "13",
+        "indexes": [
+            "17",
+            "18",
+            "21"
+        ]
+    },
+    "13": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "14"
+    },
+    "14": {
+        "type": 105,
+        "inputs": [
+            "15"
+        ],
+        "expr": "16",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "16": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "17": {
+        "type": 5,
+        "name": "b"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i7"
+    },
+    "20": {
+        "type": 5,
+        "name": "w"
+    },
+    "21": {
+        "type": 5,
+        "name": "k"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "26"
+    },
+    "23": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "m"
+    },
+    "25": {
+        "type": 0,
+        "val": 4
+    },
+    "26": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 0,
+        "val": 2500
+    },
+    "28": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "m"
+    },
+    "30": {
+        "type": 0,
+        "val": 4
+    },
+    "31": {
+        "type": 5,
+        "name": "b"
+    },
+    "32": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/g2bmm/G2BMM_6.expr b/test/nnet/log/g2bmm/G2BMM_6.expr
new file mode 100644
index 00000000..ece30ab4
--- /dev/null
+++ b/test/nnet/log/g2bmm/G2BMM_6.expr
@@ -0,0 +1,321 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 6 Rule8GuidedDLT: Toward Sg2bmm. guidedDLTMoreVar2 guidedDLTDLMismatch ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "2",
+        "indexes": [
+            "27",
+            "36",
+            "37"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i7": [
+                0,
+                10000
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "4",
+        "indexes": [
+            "24",
+            "25",
+            "26"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "i7": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "14"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "7",
+        "indexes": [
+            "11",
+            "12",
+            "13"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "8"
+    },
+    "8": {
+        "type": 105,
+        "inputs": [
+            "9"
+        ],
+        "expr": "10",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "11": {
+        "type": 5,
+        "name": "b"
+    },
+    "12": {
+        "type": 5,
+        "name": "i7"
+    },
+    "13": {
+        "type": 5,
+        "name": "k"
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "15",
+        "indexes": [
+            "19",
+            "20",
+            "23"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "16"
+    },
+    "16": {
+        "type": 105,
+        "inputs": [
+            "17"
+        ],
+        "expr": "18",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "19": {
+        "type": 5,
+        "name": "b"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "i7"
+    },
+    "22": {
+        "type": 5,
+        "name": "w"
+    },
+    "23": {
+        "type": 5,
+        "name": "k"
+    },
+    "24": {
+        "type": 5,
+        "name": "b"
+    },
+    "25": {
+        "type": 5,
+        "name": "i7"
+    },
+    "26": {
+        "type": 5,
+        "name": "w"
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "31"
+    },
+    "28": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "m"
+    },
+    "30": {
+        "type": 0,
+        "val": 4
+    },
+    "31": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 0,
+        "val": 2500
+    },
+    "33": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 5,
+        "name": "m"
+    },
+    "35": {
+        "type": 0,
+        "val": 4
+    },
+    "36": {
+        "type": 5,
+        "name": "b"
+    },
+    "37": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/g2bmm/G2BMM_7.expr b/test/nnet/log/g2bmm/G2BMM_7.expr
new file mode 100644
index 00000000..2290f192
--- /dev/null
+++ b/test/nnet/log/g2bmm/G2BMM_7.expr
@@ -0,0 +1,276 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 7 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "2",
+        "indexes": [
+            "22",
+            "23",
+            "32"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "i7": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "12"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "5",
+        "indexes": [
+            "9",
+            "10",
+            "11"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "9": {
+        "type": 5,
+        "name": "b"
+    },
+    "10": {
+        "type": 5,
+        "name": "i7"
+    },
+    "11": {
+        "type": 5,
+        "name": "k"
+    },
+    "12": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "13",
+        "indexes": [
+            "17",
+            "18",
+            "21"
+        ]
+    },
+    "13": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "14"
+    },
+    "14": {
+        "type": 105,
+        "inputs": [
+            "15"
+        ],
+        "expr": "16",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "16": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "17": {
+        "type": 5,
+        "name": "b"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i7"
+    },
+    "20": {
+        "type": 5,
+        "name": "w"
+    },
+    "21": {
+        "type": 5,
+        "name": "k"
+    },
+    "22": {
+        "type": 5,
+        "name": "b"
+    },
+    "23": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "24",
+        "rhs": "27"
+    },
+    "24": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "m"
+    },
+    "26": {
+        "type": 0,
+        "val": 4
+    },
+    "27": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 0,
+        "val": 2500
+    },
+    "29": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "m"
+    },
+    "31": {
+        "type": 0,
+        "val": 4
+    },
+    "32": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/g2bmm/G2BMM_8.expr b/test/nnet/log/g2bmm/G2BMM_8.expr
new file mode 100644
index 00000000..7aca069b
--- /dev/null
+++ b/test/nnet/log/g2bmm/G2BMM_8.expr
@@ -0,0 +1,404 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 8 Rule6KenerlMatching: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "2",
+        "indexes": [
+            "32",
+            "33",
+            "42"
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 103,
+        "inputs": [
+            "4",
+            "8"
+        ],
+        "expr": "12",
+        "args": [
+            8,
+            10000,
+            32,
+            512,
+            1
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 105,
+        "inputs": [
+            "6"
+        ],
+        "expr": "7",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "7": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "9"
+    },
+    "9": {
+        "type": 105,
+        "inputs": [
+            "10"
+        ],
+        "expr": "11",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "10": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "11": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "i7": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "13": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "14",
+        "rhs": "22"
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "15",
+        "indexes": [
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "16"
+    },
+    "16": {
+        "type": 105,
+        "inputs": [
+            "17"
+        ],
+        "expr": "18",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "19": {
+        "type": 5,
+        "name": "b"
+    },
+    "20": {
+        "type": 5,
+        "name": "i7"
+    },
+    "21": {
+        "type": 5,
+        "name": "k"
+    },
+    "22": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "23",
+        "indexes": [
+            "27",
+            "28",
+            "31"
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "24"
+    },
+    "24": {
+        "type": 105,
+        "inputs": [
+            "25"
+        ],
+        "expr": "26",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "27": {
+        "type": 5,
+        "name": "b"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "i7"
+    },
+    "30": {
+        "type": 5,
+        "name": "w"
+    },
+    "31": {
+        "type": 5,
+        "name": "k"
+    },
+    "32": {
+        "type": 5,
+        "name": "b"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "37"
+    },
+    "34": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "m"
+    },
+    "36": {
+        "type": 0,
+        "val": 4
+    },
+    "37": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "38",
+        "rhs": "39"
+    },
+    "38": {
+        "type": 0,
+        "val": 2500
+    },
+    "39": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 5,
+        "name": "m"
+    },
+    "41": {
+        "type": 0,
+        "val": 4
+    },
+    "42": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/g2bmm/G2BMM_9.expr b/test/nnet/log/g2bmm/G2BMM_9.expr
new file mode 100644
index 00000000..660ad45b
--- /dev/null
+++ b/test/nnet/log/g2bmm/G2BMM_9.expr
@@ -0,0 +1,746 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 9 Rule6KenerlMatching: ",
+    "0": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "1"
+    },
+    "1": {
+        "type": 105,
+        "inputs": [
+            "2"
+        ],
+        "expr": "32",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 103,
+        "inputs": [
+            "4",
+            "8"
+        ],
+        "expr": "12",
+        "args": [
+            8,
+            10000,
+            32,
+            512,
+            1
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 105,
+        "inputs": [
+            "6"
+        ],
+        "expr": "7",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "7": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "9"
+    },
+    "9": {
+        "type": 105,
+        "inputs": [
+            "10"
+        ],
+        "expr": "11",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "10": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "11": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "i7": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "13": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "14",
+        "rhs": "22"
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "15",
+        "indexes": [
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "16"
+    },
+    "16": {
+        "type": 105,
+        "inputs": [
+            "17"
+        ],
+        "expr": "18",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "19": {
+        "type": 5,
+        "name": "b"
+    },
+    "20": {
+        "type": 5,
+        "name": "i7"
+    },
+    "21": {
+        "type": 5,
+        "name": "k"
+    },
+    "22": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "23",
+        "indexes": [
+            "27",
+            "28",
+            "31"
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "24"
+    },
+    "24": {
+        "type": 105,
+        "inputs": [
+            "25"
+        ],
+        "expr": "26",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "27": {
+        "type": 5,
+        "name": "b"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "i7"
+    },
+    "30": {
+        "type": 5,
+        "name": "w"
+    },
+    "31": {
+        "type": 5,
+        "name": "k"
+    },
+    "32": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "33",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "33": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "34",
+        "indexes": [
+            "64",
+            "65",
+            "74"
+        ]
+    },
+    "34": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "35"
+    },
+    "35": {
+        "type": 103,
+        "inputs": [
+            "36",
+            "40"
+        ],
+        "expr": "44",
+        "args": [
+            8,
+            10000,
+            32,
+            512,
+            1
+        ]
+    },
+    "36": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "37"
+    },
+    "37": {
+        "type": 105,
+        "inputs": [
+            "38"
+        ],
+        "expr": "39",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "38": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "39": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "40": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "41"
+    },
+    "41": {
+        "type": 105,
+        "inputs": [
+            "42"
+        ],
+        "expr": "43",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "42": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "43": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "44": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "45",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "i7": [
+                0,
+                10000
+            ],
+            "w": [
+                0,
+                65
+            ]
+        },
+        "sumVarRanges": {
+            "k": [
+                0,
+                512
+            ]
+        }
+    },
+    "45": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "46",
+        "rhs": "54"
+    },
+    "46": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "47",
+        "indexes": [
+            "51",
+            "52",
+            "53"
+        ]
+    },
+    "47": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "48"
+    },
+    "48": {
+        "type": 105,
+        "inputs": [
+            "49"
+        ],
+        "expr": "50",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "49": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "50": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "51": {
+        "type": 5,
+        "name": "b"
+    },
+    "52": {
+        "type": 5,
+        "name": "i7"
+    },
+    "53": {
+        "type": 5,
+        "name": "k"
+    },
+    "54": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "55",
+        "indexes": [
+            "59",
+            "60",
+            "63"
+        ]
+    },
+    "55": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "56"
+    },
+    "56": {
+        "type": 105,
+        "inputs": [
+            "57"
+        ],
+        "expr": "58",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "57": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "58": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "59": {
+        "type": 5,
+        "name": "b"
+    },
+    "60": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "61",
+        "rhs": "62"
+    },
+    "61": {
+        "type": 5,
+        "name": "i7"
+    },
+    "62": {
+        "type": 5,
+        "name": "w"
+    },
+    "63": {
+        "type": 5,
+        "name": "k"
+    },
+    "64": {
+        "type": 5,
+        "name": "b"
+    },
+    "65": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "66",
+        "rhs": "69"
+    },
+    "66": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "67",
+        "rhs": "68"
+    },
+    "67": {
+        "type": 5,
+        "name": "m"
+    },
+    "68": {
+        "type": 0,
+        "val": 4
+    },
+    "69": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 0,
+        "val": 2500
+    },
+    "71": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "72",
+        "rhs": "73"
+    },
+    "72": {
+        "type": 5,
+        "name": "m"
+    },
+    "73": {
+        "type": 0,
+        "val": 4
+    },
+    "74": {
+        "type": 5,
+        "name": "w"
+    }
+}
diff --git a/test/nnet/log/gbmm/GBMM_0.expr b/test/nnet/log/gbmm/GBMM_0.expr
new file mode 100644
index 00000000..f9647dc1
--- /dev/null
+++ b/test/nnet/log/gbmm/GBMM_0.expr
@@ -0,0 +1,143 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 0 Init: ruleBasedDFS: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "1": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "2",
+        "rhs": "7"
+    },
+    "2": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "3",
+        "indexes": [
+            "4",
+            "5",
+            "6"
+        ]
+    },
+    "3": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "4": {
+        "type": 5,
+        "name": "b"
+    },
+    "5": {
+        "type": 5,
+        "name": "m"
+    },
+    "6": {
+        "type": 5,
+        "name": "w"
+    },
+    "7": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "8",
+        "indexes": [
+            "9",
+            "10",
+            "17"
+        ]
+    },
+    "8": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "9": {
+        "type": 5,
+        "name": "b"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "16"
+    },
+    "11": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "12",
+        "rhs": "13"
+    },
+    "12": {
+        "type": 5,
+        "name": "m"
+    },
+    "13": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "14",
+        "rhs": "15"
+    },
+    "14": {
+        "type": 0,
+        "val": 4
+    },
+    "15": {
+        "type": 5,
+        "name": "w"
+    },
+    "16": {
+        "type": 0,
+        "val": -128
+    },
+    "17": {
+        "type": 5,
+        "name": "n"
+    }
+}
diff --git a/test/nnet/log/gbmm/GBMM_1.expr b/test/nnet/log/gbmm/GBMM_1.expr
new file mode 100644
index 00000000..c0a2c2ee
--- /dev/null
+++ b/test/nnet/log/gbmm/GBMM_1.expr
@@ -0,0 +1,258 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 1 Rule1VariableSplit: Old iters: [m], new iters: [i3,i4] phis: [(m / 4),(m % 4)] psis: [((4 * i3) + i4)]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "2",
+        "indexes": [
+            "28",
+            "31",
+            "34",
+            "35"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i3": [
+                0,
+                2500
+            ],
+            "i4": [
+                0,
+                4
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "13"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "5",
+        "indexes": [
+            "6",
+            "7",
+            "12"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "6": {
+        "type": 5,
+        "name": "b"
+    },
+    "7": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "8",
+        "rhs": "11"
+    },
+    "8": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "9",
+        "rhs": "10"
+    },
+    "9": {
+        "type": 0,
+        "val": 4
+    },
+    "10": {
+        "type": 5,
+        "name": "i3"
+    },
+    "11": {
+        "type": 5,
+        "name": "i4"
+    },
+    "12": {
+        "type": 5,
+        "name": "w"
+    },
+    "13": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "14",
+        "indexes": [
+            "15",
+            "16",
+            "27"
+        ]
+    },
+    "14": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "15": {
+        "type": 5,
+        "name": "b"
+    },
+    "16": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "17",
+        "rhs": "26"
+    },
+    "17": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "18",
+        "rhs": "23"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "22"
+    },
+    "19": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "20",
+        "rhs": "21"
+    },
+    "20": {
+        "type": 0,
+        "val": 4
+    },
+    "21": {
+        "type": 5,
+        "name": "i3"
+    },
+    "22": {
+        "type": 5,
+        "name": "i4"
+    },
+    "23": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 0,
+        "val": 4
+    },
+    "25": {
+        "type": 5,
+        "name": "w"
+    },
+    "26": {
+        "type": 0,
+        "val": -128
+    },
+    "27": {
+        "type": 5,
+        "name": "n"
+    },
+    "28": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "m"
+    },
+    "30": {
+        "type": 0,
+        "val": 4
+    },
+    "31": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "m"
+    },
+    "33": {
+        "type": 0,
+        "val": 4
+    },
+    "34": {
+        "type": 5,
+        "name": "b"
+    },
+    "35": {
+        "type": 5,
+        "name": "n"
+    }
+}
diff --git a/test/nnet/log/gbmm/GBMM_2.expr b/test/nnet/log/gbmm/GBMM_2.expr
new file mode 100644
index 00000000..cd4670cd
--- /dev/null
+++ b/test/nnet/log/gbmm/GBMM_2.expr
@@ -0,0 +1,292 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 2 Rule7DLT: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "2",
+        "indexes": [
+            "31",
+            "34",
+            "37",
+            "38"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i3": [
+                0,
+                2500
+            ],
+            "i4": [
+                0,
+                4
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "16"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "5",
+        "indexes": [
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "9": {
+        "type": 5,
+        "name": "b"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "i3"
+    },
+    "12": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 0,
+        "val": 2500
+    },
+    "14": {
+        "type": 5,
+        "name": "i4"
+    },
+    "15": {
+        "type": 5,
+        "name": "w"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "17",
+        "indexes": [
+            "18",
+            "19",
+            "30"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 5,
+        "name": "b"
+    },
+    "19": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "20",
+        "rhs": "29"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "26"
+    },
+    "21": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "22",
+        "rhs": "25"
+    },
+    "22": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "23",
+        "rhs": "24"
+    },
+    "23": {
+        "type": 0,
+        "val": 4
+    },
+    "24": {
+        "type": 5,
+        "name": "i3"
+    },
+    "25": {
+        "type": 5,
+        "name": "i4"
+    },
+    "26": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 0,
+        "val": 4
+    },
+    "28": {
+        "type": 5,
+        "name": "w"
+    },
+    "29": {
+        "type": 0,
+        "val": -128
+    },
+    "30": {
+        "type": 5,
+        "name": "n"
+    },
+    "31": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "m"
+    },
+    "33": {
+        "type": 0,
+        "val": 4
+    },
+    "34": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "m"
+    },
+    "36": {
+        "type": 0,
+        "val": 4
+    },
+    "37": {
+        "type": 5,
+        "name": "b"
+    },
+    "38": {
+        "type": 5,
+        "name": "n"
+    }
+}
diff --git a/test/nnet/log/gbmm/GBMM_3.expr b/test/nnet/log/gbmm/GBMM_3.expr
new file mode 100644
index 00000000..6ba1418a
--- /dev/null
+++ b/test/nnet/log/gbmm/GBMM_3.expr
@@ -0,0 +1,306 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 3 Rule7DLT: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "2",
+        "indexes": [
+            "30",
+            "33",
+            "36",
+            "37"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i3": [
+                0,
+                2500
+            ],
+            "i4": [
+                0,
+                4
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "16"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "5",
+        "indexes": [
+            "9",
+            "10",
+            "15"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "9": {
+        "type": 5,
+        "name": "b"
+    },
+    "10": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "11",
+        "rhs": "12"
+    },
+    "11": {
+        "type": 5,
+        "name": "i3"
+    },
+    "12": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "13",
+        "rhs": "14"
+    },
+    "13": {
+        "type": 0,
+        "val": 2500
+    },
+    "14": {
+        "type": 5,
+        "name": "i4"
+    },
+    "15": {
+        "type": 5,
+        "name": "w"
+    },
+    "16": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "17",
+        "indexes": [
+            "21",
+            "22",
+            "29"
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "18"
+    },
+    "18": {
+        "type": 105,
+        "inputs": [
+            "19"
+        ],
+        "expr": "20",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "19": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "20": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "21": {
+        "type": 5,
+        "name": "b"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "28"
+    },
+    "23": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "i3"
+    },
+    "25": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "26",
+        "rhs": "27"
+    },
+    "26": {
+        "type": 0,
+        "val": 2500
+    },
+    "27": {
+        "type": 5,
+        "name": "i4"
+    },
+    "28": {
+        "type": 5,
+        "name": "w"
+    },
+    "29": {
+        "type": 5,
+        "name": "n"
+    },
+    "30": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "31",
+        "rhs": "32"
+    },
+    "31": {
+        "type": 5,
+        "name": "m"
+    },
+    "32": {
+        "type": 0,
+        "val": 4
+    },
+    "33": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 5,
+        "name": "m"
+    },
+    "35": {
+        "type": 0,
+        "val": 4
+    },
+    "36": {
+        "type": 5,
+        "name": "b"
+    },
+    "37": {
+        "type": 5,
+        "name": "n"
+    }
+}
diff --git a/test/nnet/log/gbmm/GBMM_4.expr b/test/nnet/log/gbmm/GBMM_4.expr
new file mode 100644
index 00000000..3d06dafd
--- /dev/null
+++ b/test/nnet/log/gbmm/GBMM_4.expr
@@ -0,0 +1,331 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 4 Rule2VariableMerging: Old iters: [i3,i4], new iters: [i7] phis: [(i3 + (2500 * i4))] psis: [(i7 % 2500),(i7 / 2500)]",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 4,
+        "object": "2",
+        "indexes": [
+            "31",
+            "34",
+            "37",
+            "38"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i3": [
+                0,
+                2500
+            ],
+            "i4": [
+                0,
+                4
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "4",
+        "indexes": [
+            "24",
+            "29",
+            "30"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "i7": [
+                0,
+                10000
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "14"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "7",
+        "indexes": [
+            "11",
+            "12",
+            "13"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "8"
+    },
+    "8": {
+        "type": 105,
+        "inputs": [
+            "9"
+        ],
+        "expr": "10",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "11": {
+        "type": 5,
+        "name": "b"
+    },
+    "12": {
+        "type": 5,
+        "name": "i7"
+    },
+    "13": {
+        "type": 5,
+        "name": "w"
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "15",
+        "indexes": [
+            "19",
+            "20",
+            "23"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "16"
+    },
+    "16": {
+        "type": 105,
+        "inputs": [
+            "17"
+        ],
+        "expr": "18",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "19": {
+        "type": 5,
+        "name": "b"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "i7"
+    },
+    "22": {
+        "type": 5,
+        "name": "w"
+    },
+    "23": {
+        "type": 5,
+        "name": "n"
+    },
+    "24": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "i3"
+    },
+    "26": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 0,
+        "val": 2500
+    },
+    "28": {
+        "type": 5,
+        "name": "i4"
+    },
+    "29": {
+        "type": 5,
+        "name": "b"
+    },
+    "30": {
+        "type": 5,
+        "name": "n"
+    },
+    "31": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 5,
+        "name": "m"
+    },
+    "33": {
+        "type": 0,
+        "val": 4
+    },
+    "34": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "m"
+    },
+    "36": {
+        "type": 0,
+        "val": 4
+    },
+    "37": {
+        "type": 5,
+        "name": "b"
+    },
+    "38": {
+        "type": 5,
+        "name": "n"
+    }
+}
diff --git a/test/nnet/log/gbmm/GBMM_5.expr b/test/nnet/log/gbmm/GBMM_5.expr
new file mode 100644
index 00000000..736c697b
--- /dev/null
+++ b/test/nnet/log/gbmm/GBMM_5.expr
@@ -0,0 +1,276 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 5 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "2",
+        "indexes": [
+            "22",
+            "31",
+            "32"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i7": [
+                0,
+                10000
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "12"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "5",
+        "indexes": [
+            "9",
+            "10",
+            "11"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "9": {
+        "type": 5,
+        "name": "b"
+    },
+    "10": {
+        "type": 5,
+        "name": "i7"
+    },
+    "11": {
+        "type": 5,
+        "name": "w"
+    },
+    "12": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "13",
+        "indexes": [
+            "17",
+            "18",
+            "21"
+        ]
+    },
+    "13": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "14"
+    },
+    "14": {
+        "type": 105,
+        "inputs": [
+            "15"
+        ],
+        "expr": "16",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "16": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "17": {
+        "type": 5,
+        "name": "b"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i7"
+    },
+    "20": {
+        "type": 5,
+        "name": "w"
+    },
+    "21": {
+        "type": 5,
+        "name": "n"
+    },
+    "22": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "23",
+        "rhs": "26"
+    },
+    "23": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "24",
+        "rhs": "25"
+    },
+    "24": {
+        "type": 5,
+        "name": "m"
+    },
+    "25": {
+        "type": 0,
+        "val": 4
+    },
+    "26": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "27",
+        "rhs": "28"
+    },
+    "27": {
+        "type": 0,
+        "val": 2500
+    },
+    "28": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "m"
+    },
+    "30": {
+        "type": 0,
+        "val": 4
+    },
+    "31": {
+        "type": 5,
+        "name": "b"
+    },
+    "32": {
+        "type": 5,
+        "name": "n"
+    }
+}
diff --git a/test/nnet/log/gbmm/GBMM_6.expr b/test/nnet/log/gbmm/GBMM_6.expr
new file mode 100644
index 00000000..65720a53
--- /dev/null
+++ b/test/nnet/log/gbmm/GBMM_6.expr
@@ -0,0 +1,321 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 6 Rule8GuidedDLT: Toward LongformerGBMM. guidedDLTMoreVar2 guidedDLTDLMismatch guidedDLTDLMismatch ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "2",
+        "indexes": [
+            "27",
+            "36",
+            "37"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "i7": [
+                0,
+                10000
+            ],
+            "b": [
+                0,
+                8
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "3": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "4",
+        "indexes": [
+            "24",
+            "25",
+            "26"
+        ]
+    },
+    "4": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "5",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "i7": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "5": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "6",
+        "rhs": "14"
+    },
+    "6": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "7",
+        "indexes": [
+            "11",
+            "12",
+            "13"
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "8"
+    },
+    "8": {
+        "type": 105,
+        "inputs": [
+            "9"
+        ],
+        "expr": "10",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "9": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "10": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "11": {
+        "type": 5,
+        "name": "b"
+    },
+    "12": {
+        "type": 5,
+        "name": "i7"
+    },
+    "13": {
+        "type": 5,
+        "name": "w"
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "15",
+        "indexes": [
+            "19",
+            "20",
+            "23"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "16"
+    },
+    "16": {
+        "type": 105,
+        "inputs": [
+            "17"
+        ],
+        "expr": "18",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "19": {
+        "type": 5,
+        "name": "b"
+    },
+    "20": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "21",
+        "rhs": "22"
+    },
+    "21": {
+        "type": 5,
+        "name": "i7"
+    },
+    "22": {
+        "type": 5,
+        "name": "w"
+    },
+    "23": {
+        "type": 5,
+        "name": "n"
+    },
+    "24": {
+        "type": 5,
+        "name": "b"
+    },
+    "25": {
+        "type": 5,
+        "name": "i7"
+    },
+    "26": {
+        "type": 5,
+        "name": "n"
+    },
+    "27": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "28",
+        "rhs": "31"
+    },
+    "28": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "m"
+    },
+    "30": {
+        "type": 0,
+        "val": 4
+    },
+    "31": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "32",
+        "rhs": "33"
+    },
+    "32": {
+        "type": 0,
+        "val": 2500
+    },
+    "33": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "34",
+        "rhs": "35"
+    },
+    "34": {
+        "type": 5,
+        "name": "m"
+    },
+    "35": {
+        "type": 0,
+        "val": 4
+    },
+    "36": {
+        "type": 5,
+        "name": "b"
+    },
+    "37": {
+        "type": 5,
+        "name": "n"
+    }
+}
diff --git a/test/nnet/log/gbmm/GBMM_7.expr b/test/nnet/log/gbmm/GBMM_7.expr
new file mode 100644
index 00000000..4a24d63a
--- /dev/null
+++ b/test/nnet/log/gbmm/GBMM_7.expr
@@ -0,0 +1,276 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 7 Rule4StageMerging: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "2",
+        "indexes": [
+            "22",
+            "23",
+            "32"
+        ]
+    },
+    "2": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "3",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "i7": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "3": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "4",
+        "rhs": "12"
+    },
+    "4": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "5",
+        "indexes": [
+            "9",
+            "10",
+            "11"
+        ]
+    },
+    "5": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "6"
+    },
+    "6": {
+        "type": 105,
+        "inputs": [
+            "7"
+        ],
+        "expr": "8",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "7": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "9": {
+        "type": 5,
+        "name": "b"
+    },
+    "10": {
+        "type": 5,
+        "name": "i7"
+    },
+    "11": {
+        "type": 5,
+        "name": "w"
+    },
+    "12": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "13",
+        "indexes": [
+            "17",
+            "18",
+            "21"
+        ]
+    },
+    "13": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "14"
+    },
+    "14": {
+        "type": 105,
+        "inputs": [
+            "15"
+        ],
+        "expr": "16",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "16": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "17": {
+        "type": 5,
+        "name": "b"
+    },
+    "18": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "19",
+        "rhs": "20"
+    },
+    "19": {
+        "type": 5,
+        "name": "i7"
+    },
+    "20": {
+        "type": 5,
+        "name": "w"
+    },
+    "21": {
+        "type": 5,
+        "name": "n"
+    },
+    "22": {
+        "type": 5,
+        "name": "b"
+    },
+    "23": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "24",
+        "rhs": "27"
+    },
+    "24": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "25",
+        "rhs": "26"
+    },
+    "25": {
+        "type": 5,
+        "name": "m"
+    },
+    "26": {
+        "type": 0,
+        "val": 4
+    },
+    "27": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "28",
+        "rhs": "29"
+    },
+    "28": {
+        "type": 0,
+        "val": 2500
+    },
+    "29": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "30",
+        "rhs": "31"
+    },
+    "30": {
+        "type": 5,
+        "name": "m"
+    },
+    "31": {
+        "type": 0,
+        "val": 4
+    },
+    "32": {
+        "type": 5,
+        "name": "n"
+    }
+}
diff --git a/test/nnet/log/gbmm/GBMM_8.expr b/test/nnet/log/gbmm/GBMM_8.expr
new file mode 100644
index 00000000..c241aaef
--- /dev/null
+++ b/test/nnet/log/gbmm/GBMM_8.expr
@@ -0,0 +1,404 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 8 Rule6KenerlMatching: ",
+    "0": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "1",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "1": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "2",
+        "indexes": [
+            "32",
+            "33",
+            "42"
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 104,
+        "inputs": [
+            "4",
+            "8"
+        ],
+        "expr": "12",
+        "args": [
+            8,
+            10000,
+            32,
+            512,
+            1
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 105,
+        "inputs": [
+            "6"
+        ],
+        "expr": "7",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "7": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "9"
+    },
+    "9": {
+        "type": 105,
+        "inputs": [
+            "10"
+        ],
+        "expr": "11",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "10": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "11": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "i7": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "13": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "14",
+        "rhs": "22"
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "15",
+        "indexes": [
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "16"
+    },
+    "16": {
+        "type": 105,
+        "inputs": [
+            "17"
+        ],
+        "expr": "18",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "19": {
+        "type": 5,
+        "name": "b"
+    },
+    "20": {
+        "type": 5,
+        "name": "i7"
+    },
+    "21": {
+        "type": 5,
+        "name": "w"
+    },
+    "22": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "23",
+        "indexes": [
+            "27",
+            "28",
+            "31"
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "24"
+    },
+    "24": {
+        "type": 105,
+        "inputs": [
+            "25"
+        ],
+        "expr": "26",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "27": {
+        "type": 5,
+        "name": "b"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "i7"
+    },
+    "30": {
+        "type": 5,
+        "name": "w"
+    },
+    "31": {
+        "type": 5,
+        "name": "n"
+    },
+    "32": {
+        "type": 5,
+        "name": "b"
+    },
+    "33": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "34",
+        "rhs": "37"
+    },
+    "34": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "35",
+        "rhs": "36"
+    },
+    "35": {
+        "type": 5,
+        "name": "m"
+    },
+    "36": {
+        "type": 0,
+        "val": 4
+    },
+    "37": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "38",
+        "rhs": "39"
+    },
+    "38": {
+        "type": 0,
+        "val": 2500
+    },
+    "39": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "40",
+        "rhs": "41"
+    },
+    "40": {
+        "type": 5,
+        "name": "m"
+    },
+    "41": {
+        "type": 0,
+        "val": 4
+    },
+    "42": {
+        "type": 5,
+        "name": "n"
+    }
+}
diff --git a/test/nnet/log/gbmm/GBMM_9.expr b/test/nnet/log/gbmm/GBMM_9.expr
new file mode 100644
index 00000000..9b25444c
--- /dev/null
+++ b/test/nnet/log/gbmm/GBMM_9.expr
@@ -0,0 +1,746 @@
+{
+    "Version": 1,
+    "Msg": "=== Depth 9 Rule6KenerlMatching: ",
+    "0": {
+        "type": 4,
+        "name": "T5",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "1"
+    },
+    "1": {
+        "type": 105,
+        "inputs": [
+            "2"
+        ],
+        "expr": "32",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "2": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "3"
+    },
+    "3": {
+        "type": 104,
+        "inputs": [
+            "4",
+            "8"
+        ],
+        "expr": "12",
+        "args": [
+            8,
+            10000,
+            32,
+            512,
+            1
+        ]
+    },
+    "4": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "5"
+    },
+    "5": {
+        "type": 105,
+        "inputs": [
+            "6"
+        ],
+        "expr": "7",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "6": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "7": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "8": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "9"
+    },
+    "9": {
+        "type": 105,
+        "inputs": [
+            "10"
+        ],
+        "expr": "11",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "10": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "11": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "12": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "13",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "i7": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "13": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "14",
+        "rhs": "22"
+    },
+    "14": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "15",
+        "indexes": [
+            "19",
+            "20",
+            "21"
+        ]
+    },
+    "15": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "16"
+    },
+    "16": {
+        "type": 105,
+        "inputs": [
+            "17"
+        ],
+        "expr": "18",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "17": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "18": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "19": {
+        "type": 5,
+        "name": "b"
+    },
+    "20": {
+        "type": 5,
+        "name": "i7"
+    },
+    "21": {
+        "type": 5,
+        "name": "w"
+    },
+    "22": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "23",
+        "indexes": [
+            "27",
+            "28",
+            "31"
+        ]
+    },
+    "23": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "24"
+    },
+    "24": {
+        "type": 105,
+        "inputs": [
+            "25"
+        ],
+        "expr": "26",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "25": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "26": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "27": {
+        "type": 5,
+        "name": "b"
+    },
+    "28": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "29",
+        "rhs": "30"
+    },
+    "29": {
+        "type": 5,
+        "name": "i7"
+    },
+    "30": {
+        "type": 5,
+        "name": "w"
+    },
+    "31": {
+        "type": 5,
+        "name": "n"
+    },
+    "32": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "33",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "m": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        }
+    },
+    "33": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "34",
+        "indexes": [
+            "64",
+            "65",
+            "74"
+        ]
+    },
+    "34": {
+        "type": 4,
+        "name": "T4",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "35"
+    },
+    "35": {
+        "type": 104,
+        "inputs": [
+            "36",
+            "40"
+        ],
+        "expr": "44",
+        "args": [
+            8,
+            10000,
+            32,
+            512,
+            1
+        ]
+    },
+    "36": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "37"
+    },
+    "37": {
+        "type": 105,
+        "inputs": [
+            "38"
+        ],
+        "expr": "39",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "38": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "39": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "40": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "41"
+    },
+    "41": {
+        "type": 105,
+        "inputs": [
+            "42"
+        ],
+        "expr": "43",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "42": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "43": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "44": {
+        "type": 2,
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "summand": "45",
+        "loopVarRanges": {
+            "b": [
+                0,
+                8
+            ],
+            "i7": [
+                0,
+                10000
+            ],
+            "n": [
+                0,
+                512
+            ]
+        },
+        "sumVarRanges": {
+            "w": [
+                0,
+                65
+            ]
+        }
+    },
+    "45": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "46",
+        "rhs": "54"
+    },
+    "46": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "47",
+        "indexes": [
+            "51",
+            "52",
+            "53"
+        ]
+    },
+    "47": {
+        "type": 4,
+        "name": "T1",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "48"
+    },
+    "48": {
+        "type": 105,
+        "inputs": [
+            "49"
+        ],
+        "expr": "50",
+        "outputShape": [
+            8,
+            10000,
+            65
+        ]
+    },
+    "49": {
+        "type": 4,
+        "name": "A",
+        "shape": [
+            8,
+            10000,
+            65
+        ],
+        "paddings": [
+            0,
+            0,
+            0
+        ],
+        "source": "-1"
+    },
+    "50": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "51": {
+        "type": 5,
+        "name": "b"
+    },
+    "52": {
+        "type": 5,
+        "name": "i7"
+    },
+    "53": {
+        "type": 5,
+        "name": "w"
+    },
+    "54": {
+        "type": 3,
+        "subExprsNum": 3,
+        "object": "55",
+        "indexes": [
+            "59",
+            "60",
+            "63"
+        ]
+    },
+    "55": {
+        "type": 4,
+        "name": "T3",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "56"
+    },
+    "56": {
+        "type": 105,
+        "inputs": [
+            "57"
+        ],
+        "expr": "58",
+        "outputShape": [
+            8,
+            10000,
+            512
+        ]
+    },
+    "57": {
+        "type": 4,
+        "name": "B",
+        "shape": [
+            8,
+            10000,
+            512
+        ],
+        "paddings": [
+            0,
+            128,
+            0
+        ],
+        "source": "-1"
+    },
+    "58": {
+        "type": 4,
+        "name": "__DLT",
+        "shape": [],
+        "paddings": [],
+        "source": "-1"
+    },
+    "59": {
+        "type": 5,
+        "name": "b"
+    },
+    "60": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "61",
+        "rhs": "62"
+    },
+    "61": {
+        "type": 5,
+        "name": "i7"
+    },
+    "62": {
+        "type": 5,
+        "name": "w"
+    },
+    "63": {
+        "type": 5,
+        "name": "n"
+    },
+    "64": {
+        "type": 5,
+        "name": "b"
+    },
+    "65": {
+        "type": 1,
+        "opType": 1,
+        "lhs": "66",
+        "rhs": "69"
+    },
+    "66": {
+        "type": 1,
+        "opType": 3,
+        "lhs": "67",
+        "rhs": "68"
+    },
+    "67": {
+        "type": 5,
+        "name": "m"
+    },
+    "68": {
+        "type": 0,
+        "val": 4
+    },
+    "69": {
+        "type": 1,
+        "opType": 2,
+        "lhs": "70",
+        "rhs": "71"
+    },
+    "70": {
+        "type": 0,
+        "val": 2500
+    },
+    "71": {
+        "type": 1,
+        "opType": 4,
+        "lhs": "72",
+        "rhs": "73"
+    },
+    "72": {
+        "type": 5,
+        "name": "m"
+    },
+    "73": {
+        "type": 0,
+        "val": 4
+    },
+    "74": {
+        "type": 5,
+        "name": "n"
+    }
+}
diff --git a/test/nnet/readlog.cc b/test/nnet/readlog.cc
new file mode 100644
index 00000000..c019f653
--- /dev/null
+++ b/test/nnet/readlog.cc
@@ -0,0 +1,17 @@
+#include "nnet/Visitor/FullPrinterVisitor.h"
+#include "nnet/Visitor/HashVisitor.h"
+#include "nnet/Visitor/Serializer.h"
+#include "nnet/expr.h"
+using namespace nnet;
+using namespace std;
+
+int main(int argc, char *argv[]) {
+    if (argc != 2) {
+        printf("Usage: %s <log>\n", argv[0]);
+        return 1;
+    }
+    auto expr = Serializer().deserialize(argv[1]);
+    cout << FullPrinterVisitor().print(expr);
+    cout << endl << "Hash = " << HashVisitor().getHash(expr) << endl;
+    return 0;
+}
\ No newline at end of file
diff --git a/test/nnet/test_Interpreter.cc b/test/nnet/test_Interpreter.cc
new file mode 100644
index 00000000..7a0e1061
--- /dev/null
+++ b/test/nnet/test_Interpreter.cc
@@ -0,0 +1,171 @@
+#include "nnet/Visitor/Interpreter.h"
+#include "nnet/Visitor/Serializer.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+//{L<i3:0:2500><i4:0:4><b:0:8><w:0:65>Sum<k:0:512>
+//{({A}[b, (i3 + (2500 * i4)), k] * {B<pad=0,128,0>}[b, ((i3 + (2500 * i4)) +
+// w), k])}}
+// ==> A : Input Tensor shape=[8,10000,512] pad=[0,0,0]
+// ==> B : Input Tensor shape=[8,10000,512] pad=[0,128,0]
+TEST(Interpreter, SingleStage) {
+    DEFINE_VAR(b);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    DEFINE_VAR(i3);
+    DEFINE_VAR(i4);
+    auto A = makeTensor("A", {8, 10000, 512}, {0, 0, 0});
+    auto B = makeTensor("B", {8, 10000, 512}, {0, 128, 0});
+    auto subA = makeSubscript(A, {b, (i3 + (2500 * i4)), k});
+    auto subB = makeSubscript(B, {b, ((i3 + (2500 * i4)) + w), k});
+    auto range = makeRangeOperator(
+        {{i3, {0, 2500}}, {i4, {0, 4}}, {b, {0, 8}}, {w, {0, 65}}},
+        {{k, {0, 512}}}, subA * subB);
+    cout << range->toReadable() << endl;
+
+    auto dataA = make_ref<vector<int>>(8 * 10000 * 512);
+    auto dataB = make_ref<vector<int>>(8 * 10000 * 512);
+    for (int i = 0; i < 8 * 10000 * 512; i++) {
+        dataA->operator[](i) = i;
+        dataB->operator[](i) = i;
+    }
+    unordered_map<string, Ref<vector<int>>> inputs{{"A", dataA}, {"B", dataB}};
+    vector<vector<int>> positions{{0, 0, 0, 0}, {1, 2, 3, 4}};
+    auto values1 = Interpreter(inputs).interpret(range, positions);
+    dbg(values1);
+}
+
+//{L<i3:0:2500><i4:0:4><b:0:8><w:0:65>Sum  ...  [(i3 + (2500 * i4)),b,w]
+//{L<i45:0:10000><b:0:8><w:0:65>Sum<k:0:512>
+//{({A}[b, i45, k] * {B<pad=0,128,0>}[b, (i45 + w), k])}}}
+// ==> A : Input Tensor shape=[8,10000,512] pad=[0,0,0]
+// ==> B : Input Tensor shape=[8,10000,512] pad=[0,128,0]
+TEST(Interpreter, DoubleNestedStages) {
+    DEFINE_VAR(b);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    DEFINE_VAR(i3);
+    DEFINE_VAR(i4);
+    DEFINE_VAR(i45);
+    auto A = makeTensor("A", {8, 10000, 512}, {0, 0, 0});
+    auto B = makeTensor("B", {8, 10000, 512}, {0, 128, 0});
+    auto subA = makeSubscript(A, {b, i45, k});
+    auto subB = makeSubscript(B, {b, (i45 + w), k});
+    auto innerRange =
+        makeRangeOperator({{i45, {0, 10000}}, {b, {0, 8}}, {w, {0, 65}}},
+                          {{k, {0, 512}}}, subA * subB);
+    auto subOuter = makeSubscript(innerRange, {(i3 + (2500 * i4)), b, w});
+    auto outerRange = makeRangeOperator(
+        {{i3, {0, 2500}}, {i4, {0, 4}}, {b, {0, 8}}, {w, {0, 65}}}, {},
+        subOuter);
+    cout << outerRange->toReadable() << endl;
+
+    auto dataA = make_ref<vector<int>>(8 * 10000 * 512);
+    auto dataB = make_ref<vector<int>>(8 * 10000 * 512);
+    for (int i = 0; i < 8 * 10000 * 512; i++) {
+        dataA->operator[](i) = i;
+        dataB->operator[](i) = i;
+    }
+    unordered_map<string, Ref<vector<int>>> inputs{{"A", dataA}, {"B", dataB}};
+    vector<vector<int>> positions{{0, 0, 0, 0}, {1, 2, 3, 4}};
+    auto values2 = Interpreter(inputs).interpret(outerRange, positions);
+    dbg(values2);
+}
+
+// The above two expressions
+TEST(Interpreter, CompareTwoExprs) {
+    DEFINE_VAR(b);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    DEFINE_VAR(i3);
+    DEFINE_VAR(i4);
+    DEFINE_VAR(i45);
+    auto A = makeTensor("A", {8, 10000, 512}, {0, 0, 0});
+    auto B = makeTensor("B", {8, 10000, 512}, {0, 128, 0});
+    // singleStage
+    auto subA1 = makeSubscript(A, {b, (i3 + (2500 * i4)), k});
+    auto subB1 = makeSubscript(B, {b, ((i3 + (2500 * i4)) + w), k});
+    auto range = makeRangeOperator(
+        {{i3, {0, 2500}}, {i4, {0, 4}}, {b, {0, 8}}, {w, {0, 65}}},
+        {{k, {0, 512}}}, subA1 * subB1);
+    cout << range->toReadable() << endl;
+    // doubleStages
+    auto subA2 = makeSubscript(A, {b, i45, k});
+    auto subB2 = makeSubscript(B, {b, (i45 + w), k});
+    auto innerRange =
+        makeRangeOperator({{i45, {0, 10000}}, {b, {0, 8}}, {w, {0, 65}}},
+                          {{k, {0, 512}}}, subA2 * subB2);
+    auto subOuter = makeSubscript(innerRange, {(i3 + (2500 * i4)), b, w});
+    auto outerRange = makeRangeOperator(
+        {{i3, {0, 2500}}, {i4, {0, 4}}, {b, {0, 8}}, {w, {0, 65}}}, {},
+        subOuter);
+    cout << outerRange->toReadable() << endl;
+
+    auto dataA = make_ref<vector<int>>(8 * 10000 * 512);
+    auto dataB = make_ref<vector<int>>(8 * 10000 * 512);
+    for (int i = 0; i < 8 * 10000 * 512; i++) {
+        dataA->operator[](i) = i;
+        dataB->operator[](i) = i;
+    }
+    unordered_map<string, Ref<vector<int>>> inputs{{"A", dataA}, {"B", dataB}};
+    vector<vector<int>> positions{{0, 0, 0, 0}, {1, 2, 3, 4}};
+    auto values1 = Interpreter(inputs).interpret(range, positions);
+    auto values2 = Interpreter(inputs).interpret(outerRange, positions);
+
+    EXPECT_EQ(values1, values2);
+}
+
+// L<n:0:1><h:0:4><w:0:4><c:0:256>Sum  ...  [n,c,((h + 1) / 2),((h + 1) % 2),((w
+// + 1) / 2),((w + 1) % 2)]
+// {L<n:0:1><c:0:256><x1:0:3><x2:0:2><y1:0:3><y2:0:2>Sum<f:0:448><r:0:2><s:0:2>
+// {({A<pad=0,2,2,0>}[n, ((x1 + r) + -1), ((y1 + s) + -1), f] * {K}[((2 - (2 *
+// r)) + x2), ((2 - (2 * s)) + y2), f, c])}}
+// ==> A : Input Tensor shape=[1, 4, 4, 448] pad=[0, 2, 2, 0]
+// ==> K : Input Tensor shape=[2, 2, 448, 256] pad=[0, 0, 0, 0]
+TEST(Interpreter, TransConv) {
+    DEFINE_VAR(n);
+    DEFINE_VAR(h);
+    DEFINE_VAR(w);
+    DEFINE_VAR(c);
+    DEFINE_VAR(x1);
+    DEFINE_VAR(x2);
+    DEFINE_VAR(y1);
+    DEFINE_VAR(y2);
+    DEFINE_VAR(f);
+    DEFINE_VAR(r);
+    DEFINE_VAR(s);
+    auto A = makeTensor("A", {1, 4, 4, 448}, {0, 2, 2, 0});
+    auto K = makeTensor("K", {4, 4, 448, 256}, {0, 0, 0, 0});
+    auto subA = makeSubscript(A, {n, ((x1 + r) + (-1)), ((y1 + s) + (-1)), f});
+    auto subK =
+        makeSubscript(K, {((2 - (2 * r)) + x2), ((2 - (2 * s)) + y2), f, c});
+    auto innerRange = makeRangeOperator(
+        {{n, {0, 1}},
+         {c, {0, 256}},
+         {x1, {0, 3}},
+         {x2, {0, 2}},
+         {y1, {0, 3}},
+         {y2, {0, 2}}},
+        {{f, {0, 448}}, {r, {0, 2}}, {s, {0, 2}}}, subA * subK);
+    auto subOuter =
+        makeSubscript(innerRange, {n, c, ((h + 1) / 2), ((h + 1) % 2),
+                                   ((w + 1) / 2), ((w + 1) % 2)});
+    auto outerRange = makeRangeOperator(
+        {{n, {0, 1}}, {h, {0, 4}}, {w, {0, 4}}, {c, {0, 256}}}, {}, subOuter);
+    cout << outerRange->toReadable() << endl;
+
+    auto dataA = make_ref<vector<int>>(1 * 4 * 4 * 448);
+    auto dataK = make_ref<vector<int>>(4 * 4 * 448 * 256);
+    for (int i = 0; i < 1 * 4 * 4 * 448; i++) {
+        dataA->operator[](i) = i;
+    }
+    for (int i = 0; i < 4 * 4 * 448 * 256; i++) {
+        dataK->operator[](i) = i;
+    }
+    unordered_map<string, Ref<vector<int>>> inputs{{"A", dataA}, {"K", dataK}};
+    vector<vector<int>> positions{{0, 2, 2, 85}};
+    auto vals = Interpreter(inputs).interpret(outerRange, positions);
+    dbg(vals[0]);
+}
diff --git a/test/nnet/test_OpSearch.cc b/test/nnet/test_OpSearch.cc
new file mode 100644
index 00000000..575ef352
--- /dev/null
+++ b/test/nnet/test_OpSearch.cc
@@ -0,0 +1,240 @@
+#include "nnet/Visitor/CountRoutineVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "nnet/test.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+
+/* Evaluation bash script
+for i in $(seq 1 12); do
+    echo $i
+    NNET_UseHash=1 NNET_MaxDepth=$i ./test_OpSearch # &>
+out.searchDepthTest.$i.txt done
+
+NNET_UseHash=0 NNET_MaxDepth=8 ./test_OpSearch # &> out.searchDepthTest.$i.txt
+NNET_UseHash=1 NNET_MaxDepth=8 ./test_OpSearch # &> out.searchDepthTest.$i.txt
+*/
+
+class OpSearch : public ::testing::Test {
+  protected:
+    const int maxDepth = getMaxDepth();
+    const int useHash = getUseHash();
+    // const int maxDepth = 8;
+    // const int useHash = true;
+    const Derivator::LogMode mode = Derivator::LogMode::NoLog;
+    const Derivator::PassMode passMode = Derivator::PassMode::Full;
+    const bool isRuleBased = false;
+
+    void SetUp() override {
+        if (maxDepth < 0 || useHash < 0) {
+            GTEST_SKIP() << "Skipping OpSearch since NNET_MaxDepth or "
+                            "NNET_UseHash are not specifed.\n";
+        }
+    }
+
+  private:
+    static int getMaxDepth() {
+        if (auto s = getenv("NNET_MaxDepth"))
+            return atoi(s);
+        return -1;
+    }
+
+    static bool getUseHash() {
+        if (auto s = getenv("NNET_UseHash"))
+            return atoi(s);
+        return -1;
+    }
+};
+
+TEST_F(OpSearch, Conv2gemm_NCHW_FCRS_search) {
+    // A[n,h+r,w+s,c]*K[r,s,f,c]
+    int N = 1, H = 7, W = 7, C = 512, F = 512;
+    int R = 3, S = 3;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, C}),
+                                  vector<int>{0, R / 2, S / 2, 0});
+    // auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+    auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+
+    auto subA = makeSubscript(A, {n, h + r - R / 2, w + s - S / 2, c});
+    auto subK = makeSubscript(K, {r, s, f, c});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+
+    // Derivation
+    Formula conv_3x3_nhwc_rsfc(range, 0);
+    Derivator derivator(maxDepth, useHash, mode, passMode);
+
+    if (isRuleBased) {
+        // Rule-based derivation
+        const vector<int> rules = {3, 2, 2, 5, 8, 8, 6, 90};
+        derivator.setDumpFirstSuccess("Conv2gemm_NCHW_FCRS_");
+        derivator.ruleBasedDFS(conv_3x3_nhwc_rsfc, 0, rules);
+    } else {
+        derivator.search(conv_3x3_nhwc_rsfc, 0);
+    }
+
+    EXPECT_GE(derivator.getNumCandidates(), 1);
+    int nMatches = matchExprResult(
+        derivator, "../test/log/conv2gemm/Conv2gemm_NCHW_FCRS_11.expr");
+    EXPECT_GE(nMatches, 1);
+    // derivator.print();
+    derivator.printStatistics();
+}
+
+// Warn: F is the number of input channels, which is inversed compared with
+// normal Conv.
+// Our data layout: NHWF -> NHWC, RSFC
+// Pytorch data layout: NFHW -> NCHW, FCRS
+RangeOp buildTConv4x4_NHWF_RSFC(const int N, const int C, const int H,
+                                const int W, const int F, const int R,
+                                const int S) {
+    assert(R == 4 && S == 4);
+    const int OH = 2 * H, OW = 2 * W;
+    DEFINE_VAR(n);
+    DEFINE_VAR(c);
+    DEFINE_VAR(f);
+    DEFINE_VAR(r);
+    DEFINE_VAR(s);
+    DEFINE_VAR(x1);
+    DEFINE_VAR(x2);
+    DEFINE_VAR(y1);
+    DEFINE_VAR(y2);
+    DEFINE_VAR(i2);
+    DEFINE_VAR(i4);
+    DEFINE_VAR(h);
+    DEFINE_VAR(w);
+    // dilation * (kernel_size - 1) - padding
+    int padding = 1 * (R - 1) - 1;
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
+                                  vector<int>{0, padding, padding, 0});
+    auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+
+    auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
+    auto subK =
+        makeSubscript(K, {(R - 2) - 2 * r + x2, (S - 2) - 2 * s + y2, f, c});
+    // x1=(h+1)//2, x2=(h+1)%2, y1=(w+1)//2
+
+    auto range1 = makeRangeOperator(
+        {
+            {n, {0, N}},
+            {c, {0, C}},
+            {x1, {0, OH / 2 + 1}},
+            {x2, {0, 2}},
+            {y1, {0, OW / 2 + 1}},
+            {y2, {0, 2}},
+        },
+        {{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
+    dbg(range1);
+    auto sub0 = makeSubscript(
+        range1, {n, c, (h + 1) / 2, (h + 1) % 2, (w + 1) / 2, (w + 1) % 2});
+    auto range0 = makeRangeOperator(
+        {{n, {0, N}}, {h, {0, OH}}, {w, {0, OW}}, {c, {0, C}}}, {}, sub0);
+    return range0;
+}
+
+TEST_F(OpSearch, TConv2gemm_TConv4x4_NHWF_RSFC_search) {
+    const int N = 16, H = 2, W = 2, C = 256, F = 448;
+    const int R = 4, S = 4;
+    RangeOp range = buildTConv4x4_NHWF_RSFC(N, C, H, W, F, R, S);
+
+    Formula conv_9x9(range, 0);
+    Derivator derivator(maxDepth, useHash, mode, passMode);
+
+    if (isRuleBased) {
+        const vector<int> rules{3, 2, 2, 2, 2, 5};
+        derivator.setDumpFirstSuccess("TConv4x4_NHWF_RSFC_");
+        derivator.ruleBasedDFS(conv_9x9, 0, rules, {}, true);
+    } else
+        derivator.search(conv_9x9, 0);
+
+    EXPECT_GE(derivator.getNumCandidates(), 1);
+    int nMatches = matchExprResult(
+        derivator, "../test/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_18.expr");
+    EXPECT_GE(nMatches, 1);
+    derivator.printStatistics();
+}
+
+TEST_F(OpSearch, Conv2conv_5x5_RuleBased_NCHW_FCRS) {
+    int N = 16, C = 32, H = 224, W = 224, F = 1;
+    int R = 5, S = 5;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r - R / 2, w + s - S / 2});
+    auto subK = makeSubscript(K, {f, c, r, s});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+
+    Formula conv_9x9(range, 0);
+    Derivator derivator(maxDepth, useHash, mode, passMode);
+
+    if (isRuleBased) {
+        const vector<int> rules{9, 1, 1, 3, 2, 2, 5, 8, 8, 6, 6};
+        derivator.setDumpFirstSuccess("Conv2conv_5x5_NCHW_FCRS_");
+        derivator.ruleBasedDFS(conv_9x9, 0, rules, {}, true);
+    } else
+        derivator.search(conv_9x9, 0);
+
+    int nMatches = matchExprResult(
+        derivator, "../test/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_15.expr");
+    // derivator.print();
+    derivator.printStatistics();
+    EXPECT_GE(nMatches, 1);
+}
+
+TEST_F(OpSearch, G2BMM_RuleBased) {
+    const int n_heads = 8, seq_len = 10000, feat_len = 512;
+    const int Batch = n_heads, M = seq_len, K = feat_len, W = 32, dilation = 4;
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, K}),
+                                  vector<int>{0, 0, 0});
+    auto B = make_ref<TensorNode>("B", vector<int>({Batch, M, K}),
+                                  vector<int>{0, dilation * W, 0});
+
+    auto subA = makeSubscript(A, {b, m, k});
+    auto subB = makeSubscript(B, {b, m + dilation * (w - W), k});
+    auto range =
+        makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {w, {0, 2 * W + 1}}},
+                          {{k, {0, K}}}, subA * subB);
+
+    // Derivation: this work without padding check in stage merging
+    Formula dialted_g2bmm(range, 0);
+    Derivator derivator(maxDepth, useHash, mode, passMode);
+
+    if (isRuleBased) {
+        const vector<int> rules{1, 7, 7, 2, 8, 6, 6};
+        derivator.setDumpFirstSuccess("G2BMM_");
+        derivator.ruleBasedDFS(dialted_g2bmm, 0, rules);
+    } else {
+        derivator.search(dialted_g2bmm, 0);
+    }
+
+    EXPECT_GE(derivator.getNumCandidates(), 1);
+    int nMatches = matchExprResult(derivator, "../test/log/g2bmm/G2BMM_9.expr");
+    EXPECT_GE(nMatches, 1);
+    // derivator.print();
+    derivator.printStatistics();
+}
\ No newline at end of file
diff --git a/test/nnet/test_Rule2VariableMerging.cc b/test/nnet/test_Rule2VariableMerging.cc
new file mode 100644
index 00000000..88d5d807
--- /dev/null
+++ b/test/nnet/test_Rule2VariableMerging.cc
@@ -0,0 +1,131 @@
+#include "nnet/Pass/Rule2VariableMerging.h"
+#include "nnet/Visitor/HashVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+// L<x:0:2><y:0:4>Sum  ...  [(x + (2 * y))]
+//     {L<i3:0:8>Sum<t1:0:5>
+//     {({A}[i3] * {B}[t1])}}
+Expr buildAnsPosPos() {
+    DEFINE_VAR(x);
+    DEFINE_VAR(y);
+    DEFINE_VAR(t1);
+    DEFINE_VAR(i3);
+    auto A = makeTensor("A", {8});
+    auto B = makeTensor("B", {8});
+    auto subA = makeSubscript(A, {i3});
+    auto subB = makeSubscript(B, {t1});
+    auto innerRange =
+        makeRangeOperator({{i3, {0, 8}}}, {{t1, {0, 5}}}, subA * subB);
+    auto subInner = makeSubscript(innerRange, {x + 2 * y});
+    auto outerRange =
+        makeRangeOperator({{x, {0, 2}}, {y, {0, 4}}}, {}, subInner);
+    return outerRange;
+}
+
+void realTest(const Expr &range, const Expr &ans) {
+    Derivator derivator(0);
+    Rule2VariableMerging pass(derivator);
+    Formula origin(range, 0);
+    pass.setEnableLogging(false);
+    pass.setEnableDebug(true);
+
+    pass.run(origin, 0, origin.root);
+    dbg(origin);
+    // auto ans = buildAnsPosPos();
+    auto hashAns = HashVisitor().dispatch(ans);
+    int cntEqual = 0;
+    for (const auto &expr : pass.getTransformations()) {
+        auto hashExpr = HashVisitor().dispatch(expr);
+        if (hashExpr == hashAns)
+            ++cntEqual;
+        dbg(expr);
+    }
+    EXPECT_EQ(cntEqual, 1);
+}
+
+TEST(Rule2, PosPos) {
+    const int a = 1, b = 2;
+    DEFINE_VAR(x);
+    DEFINE_VAR(y);
+    DEFINE_VAR(t1);
+    auto A = makeTensor("A", {8});
+    auto B = makeTensor("B", {8});
+    auto subA = makeSubscript(A, {a * x + b * y});
+    auto subB = makeSubscript(B, {t1});
+    auto range = makeRangeOperator({{x, {0, 2}}, {y, {0, 4}}}, {{t1, {0, 5}}},
+                                   subA * subB);
+    realTest(range, buildAnsPosPos());
+}
+
+// L<x:0:2><y:0:4>Sum  ...  [((x + (-2 * y)) + 6)]
+//     {L<i1:0:8>Sum<t1:0:5>
+//     {({A}[i1] * {B}[t1])}}}
+Expr buildAnsPosNeg() {
+    DEFINE_VAR(x);
+    DEFINE_VAR(y);
+    DEFINE_VAR(t1);
+    DEFINE_VAR(i3);
+    auto A = makeTensor("A", {8});
+    auto B = makeTensor("B", {8});
+    auto subA = makeSubscript(A, {i3});
+    auto subB = makeSubscript(B, {t1});
+    auto innerRange =
+        makeRangeOperator({{i3, {0, 8}}}, {{t1, {0, 5}}}, subA * subB);
+    auto subInner = makeSubscript(innerRange, {x - 2 * y + 6});
+    auto outerRange =
+        makeRangeOperator({{x, {0, 2}}, {y, {0, 4}}}, {}, subInner);
+    return outerRange;
+}
+
+TEST(Rule2, PosNeg) {
+    const int a = 1, b = -2;
+    DEFINE_VAR(x);
+    DEFINE_VAR(y);
+    DEFINE_VAR(t1);
+    auto A = makeTensor("A", {8});
+    auto B = makeTensor("B", {8});
+    auto subA = makeSubscript(A, {a * x + b * y + 6});
+    auto subB = makeSubscript(B, {t1});
+    auto range = makeRangeOperator({{x, {0, 2}}, {y, {0, 4}}}, {{t1, {0, 5}}},
+                                   subA * subB);
+    realTest(range, buildAnsPosNeg());
+}
+
+// L<x:0:2><y:0:4>Sum  ...  [(((-1 * x) + (-2 * y)) + 7)]
+//     {L<i1:0:8>Sum<t1:0:5>
+//     {({A}[i1] * {B}[t1])}}
+Expr buildAnsNegNeg() {
+    DEFINE_VAR(x);
+    DEFINE_VAR(y);
+    DEFINE_VAR(t1);
+    DEFINE_VAR(i3);
+    auto A = makeTensor("A", {8});
+    auto B = makeTensor("B", {8});
+    auto subA = makeSubscript(A, {i3});
+    auto subB = makeSubscript(B, {t1});
+    auto innerRange =
+        makeRangeOperator({{i3, {0, 8}}}, {{t1, {0, 5}}}, subA * subB);
+    auto subInner = makeSubscript(innerRange, {(-1) * x - 2 * y + 7});
+    auto outerRange =
+        makeRangeOperator({{x, {0, 2}}, {y, {0, 4}}}, {}, subInner);
+    return outerRange;
+}
+
+TEST(Rule2, NegNeg) {
+    const int a = -1, b = -2;
+    DEFINE_VAR(x);
+    DEFINE_VAR(y);
+    DEFINE_VAR(t1);
+    auto A = makeTensor("A", {8});
+    auto B = makeTensor("B", {8});
+    auto subA = makeSubscript(A, {a * x + b * y + 7});
+    auto subB = makeSubscript(B, {t1});
+    auto range = makeRangeOperator({{x, {0, 2}}, {y, {0, 4}}}, {{t1, {0, 5}}},
+                                   subA * subB);
+    realTest(range, buildAnsNegNeg());
+}
\ No newline at end of file
diff --git a/test/nnet/test_TConv2gemm.cc b/test/nnet/test_TConv2gemm.cc
new file mode 100644
index 00000000..853c30fd
--- /dev/null
+++ b/test/nnet/test_TConv2gemm.cc
@@ -0,0 +1,390 @@
+#include "nnet/Visitor/CountRoutineVisitor.h"
+#include "nnet/Visitor/GetTensorsVisitor.h"
+#include "nnet/Visitor/Interpreter.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "nnet/test.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+
+TEST(Conv2conv, TConv4x4_NHWC_innerStage_RuleBased) {
+    const int N = 1, H = 2, W = 2, C = 256, F = 448;
+    const int R = 4, S = 4;
+    const int OH = 2 * H, OW = 2 * W;
+    DEFINE_VAR(n, c, f, r, s, x1, x2, y1, y2);
+    DEFINE_VAR(i2, i4);
+    // dilation * (kernel_size - 1) - padding
+    int padding = 1 * (R - 1) - 1;
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
+                                  vector<int>{0, padding, padding, 0});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, R, S, C}));
+
+    auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
+    auto subK = makeSubscript(
+        K, {f, -2 * r + (-1) * x2 + (R - 1), -2 * s + (-1) * y2 + (S - 1), c});
+
+    auto range = makeRangeOperator(
+        {
+            {n, {0, N}},
+            {c, {0, C}},
+            {x1, {0, OH / 2 + 1}},
+            {x2, {0, 2}},
+            {y1, {0, OW / 2 + 1}},
+            {y2, {0, 2}},
+        },
+        {{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
+    dbg(range);
+
+    const vector<int> rules{3, 2, 2, 2, 2, 5, 8, 8, 6, 90};
+    Formula conv_9x9(range, 0);
+    Derivator derivator;
+    derivator.ruleBasedDFS(
+        conv_9x9, 0, rules,
+        {{1, {x1, r}}, {2, {y1, s}}, {3, {x2, i2}}, {4, {y2, i4}}});
+    EXPECT_EQ(derivator.getSearchedMaxDepth(), ((int)rules.size()));
+    ASSERT_GE(derivator.getNumCandidates(), 1);
+    const auto &formula = derivator.getCandidates().front();
+    EXPECT_TRUE(CountRoutineVisitor().match(formula.root, 1, 0, 3));
+    derivator.print();
+}
+
+TEST(Conv2conv, TConv4x4_NHWC_RuleBased) {
+    const int N = 1, H = 2, W = 2, C = 256, F = 448;
+    const int R = 4, S = 4;
+    const int OH = 2 * H, OW = 2 * W;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    DEFINE_VAR(x1, x2, y1, y2);
+    // dilation * (kernel_size - 1) - padding
+    int padding = 1 * (R - 1) - 1;
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
+                                  vector<int>{0, padding, padding, 0});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, R, S, C}));
+
+    auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
+    auto subK = makeSubscript(
+        K, {f, -2 * r + (-1) * x2 + (R - 1), -2 * s + (-1) * y2 + (S - 1), c});
+
+    // auto range =
+    //     makeRangeOperator({{n, {0, N}}, {c, {0, H}}, {w, {0, W}}, {f, {0,
+    //     F}}},
+    //                       {{f, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA *
+    //                       subK);
+    auto range = makeRangeOperator(
+        {
+            {n, {0, N}},
+            {x1, {0, OH / 2 + 1}},
+            {x2, {0, 2}},
+            {y1, {0, OW / 2 + 1}},
+            {y2, {0, 2}},
+            {c, {0, C}},
+        },
+        {{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
+    auto subOuter = makeSubscript(
+        range, {n, (h + 1) / 2, (h + 1) % 2, (w + 1) / 2, (w + 1) % 2, c});
+    auto outerRange = makeRangeOperator(
+        {
+            {n, {0, N}},
+            {h, {0, OH}},
+            {w, {0, OW}},
+            {c, {0, C}},
+        },
+        {}, subOuter);
+    dbg(outerRange);
+
+    // Derivation: this work without padding check in stage merging
+    // const vector<int> rules{1, 1, 3, 2, 2, 5, 2, 2, 6, 4, 4, 4, 4, 6};
+    // Before Guided DLT seperated from rule2VarMerging
+    // const vector<int> rules{1, 1, 3, 2, 2, 5, 2, 2, 6, 6};
+    const vector<int> rules{3, 2, 2, 2, 2, 5, 8, 8, 6, 91, 90};
+    Formula conv_9x9(outerRange, 0);
+    Derivator derivator;
+    // derivator.ruleBasedDFS(conv_9x9, 0, rules,
+    //                        {{1, {"x1", "r"}},
+    //                         {2, {"y1", "s"}},
+    //                         {3, {"x2", "i2"}},
+    //                         {4, {"y2", "i4"}}});
+    derivator.ruleBasedDFS(conv_9x9, 0, rules);
+    EXPECT_EQ(derivator.getSearchedMaxDepth(), ((int)rules.size()));
+    ASSERT_GE(derivator.getNumCandidates(), 1);
+    const auto &formula = derivator.getCandidates().front();
+    EXPECT_TRUE(CountRoutineVisitor().match(formula.root, 1, 0, 3));
+    derivator.print();
+}
+
+TEST(Conv2conv, TConv4x4_BS16_NHWC_RuleBased) {
+    const int N = 16, H = 2, W = 2, C = 256, F = 448;
+    const int R = 4, S = 4;
+    const int OH = 2 * H, OW = 2 * W;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    DEFINE_VAR(x1, x2, y1, y2);
+    // dilation * (kernel_size - 1) - padding
+    int padding = 1 * (R - 1) - 1;
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
+                                  vector<int>{0, padding, padding, 0});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, R, S, C}));
+
+    auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
+    auto subK = makeSubscript(
+        K, {f, -2 * r + (-1) * x2 + (R - 1), -2 * s + (-1) * y2 + (S - 1), c});
+
+    // auto range =
+    //     makeRangeOperator({{n, {0, N}}, {c, {0, H}}, {w, {0, W}}, {f, {0,
+    //     F}}},
+    //                       {{f, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA *
+    //                       subK);
+    auto range = makeRangeOperator(
+        {
+            {n, {0, N}},
+            {x1, {0, OH / 2 + 1}},
+            {x2, {0, 2}},
+            {y1, {0, OW / 2 + 1}},
+            {y2, {0, 2}},
+            {c, {0, C}},
+        },
+        {{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
+    auto subOuter = makeSubscript(
+        range, {n, (h + 1) / 2, (h + 1) % 2, (w + 1) / 2, (w + 1) % 2, c});
+    auto outerRange = makeRangeOperator(
+        {
+            {n, {0, N}},
+            {h, {0, OH}},
+            {w, {0, OW}},
+            {c, {0, C}},
+        },
+        {}, subOuter);
+    dbg(outerRange);
+
+    // Derivation: this work without padding check in stage merging
+    // const vector<int> rules{1, 1, 3, 2, 2, 5, 2, 2, 6, 4, 4, 4, 4, 6};
+    // Before Guided DLT seperated from rule2VarMerging
+    // const vector<int> rules{1, 1, 3, 2, 2, 5, 2, 2, 6, 6};
+    const vector<int> rules{3, 2, 2, 2, 2, 5, 8, 8, 6, 91, 90};
+    Formula conv_9x9(outerRange, 0);
+    Derivator derivator;
+    // derivator.ruleBasedDFS(conv_9x9, 0, rules,
+    //                        {{1, {"x1", "r"}},
+    //                         {2, {"y1", "s"}},
+    //                         {3, {"x2", "i2"}},
+    //                         {4, {"y2", "i4"}}});
+    derivator.ruleBasedDFS(conv_9x9, 0, rules);
+    EXPECT_EQ(derivator.getSearchedMaxDepth(), ((int)rules.size()));
+    ASSERT_GE(derivator.getNumCandidates(), 1);
+    const auto &formula = derivator.getCandidates().front();
+    EXPECT_TRUE(CountRoutineVisitor().match(formula.root, 1, 0, 3));
+    derivator.print();
+}
+
+// Warn: F is the number of input channels, which is inversed compared with
+// normal Conv.
+// Our data layout: NHWF -> NHWC, FRSC
+// Pytorch data layout: NFHW -> NCHW, FCRS
+RangeOp buildTConv4x4_NHWF_FRSC(const int N, const int C, const int H,
+                                const int W, const int F, const int R,
+                                const int S) {
+    assert(R == 4 && S == 4);
+    const int OH = 2 * H, OW = 2 * W;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    DEFINE_VAR(x1, x2, y1, y2, i2, i4);
+    // dilation * (kernel_size - 1) - padding
+    int padding = 1 * (R - 1) - 1;
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
+                                  vector<int>{0, padding, padding, 0});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, R, S, C}));
+
+    auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
+    auto subK =
+        makeSubscript(K, {f, (R - 2) - 2 * r + x2, (S - 2) - 2 * s + y2, c});
+    // x1=(h+1)//2, x2=(h+1)%2, y1=(w+1)//2
+
+    auto range1 = makeRangeOperator(
+        {
+            {n, {0, N}},
+            {c, {0, C}},
+            {x1, {0, OH / 2 + 1}},
+            {x2, {0, 2}},
+            {y1, {0, OW / 2 + 1}},
+            {y2, {0, 2}},
+        },
+        {{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
+    dbg(range1);
+    auto sub0 = makeSubscript(
+        range1, {n, c, (h + 1) / 2, (h + 1) % 2, (w + 1) / 2, (w + 1) % 2});
+    auto range0 = makeRangeOperator(
+        {{n, {0, N}}, {h, {0, OH}}, {w, {0, OW}}, {c, {0, C}}}, {}, sub0);
+    return range0;
+}
+
+// Correct input expression
+
+// Warn: F is the number of input channels, which is inversed compared with
+// normal Conv.
+// Our data layout: NHWF -> NHWC, RSFC
+// Pytorch data layout: NFHW -> NCHW, FCRS
+RangeOp buildTConv4x4_NHWF_RSFC(const int N, const int C, const int H,
+                                const int W, const int F, const int R,
+                                const int S) {
+    assert(R == 4 && S == 4);
+    const int OH = 2 * H, OW = 2 * W;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    DEFINE_VAR(x1, x2, y1, y2, i2, i4);
+    // dilation * (kernel_size - 1) - padding
+    int padding = 1 * (R - 1) - 1;
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
+                                  vector<int>{0, padding, padding, 0});
+    auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+
+    auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
+    auto subK =
+        makeSubscript(K, {(R - 2) - 2 * r + x2, (S - 2) - 2 * s + y2, f, c});
+    // x1=(h+1)//2, x2=(h+1)%2, y1=(w+1)//2
+
+    auto range1 = makeRangeOperator(
+        {
+            {n, {0, N}},
+            {c, {0, C}},
+            {x1, {0, OH / 2 + 1}},
+            {x2, {0, 2}},
+            {y1, {0, OW / 2 + 1}},
+            {y2, {0, 2}},
+        },
+        {{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
+    dbg(range1);
+    auto sub0 = makeSubscript(
+        range1, {n, c, (h + 1) / 2, (h + 1) % 2, (w + 1) / 2, (w + 1) % 2});
+    auto range0 = makeRangeOperator(
+        {{n, {0, N}}, {h, {0, OH}}, {w, {0, OW}}, {c, {0, C}}}, {}, sub0);
+    return range0;
+}
+
+TEST(TConv2gemm, TConv4x4_NHWF_FRSC_correctness_of_input_expr) {
+    const int N = 1, H = 2, W = 2, C = 2, F = 3;
+    const int R = 4, S = 4;
+    RangeOp range0 = buildTConv4x4_NHWF_FRSC(N, C, H, W, F, R, S);
+
+    auto ans0 = Interpreter(range0).interpretAllOutput(range0);
+    // Pytorch results
+    // torch.conv_transpose2d(X, K, stride=2, padding=1, dilation=1)
+    // X, K is NFHW and FCRS
+    vector<int> ans1 = {190,  193,  740,  755,  770,  785,  592,  604,
+                        992,  1016, 2704, 2770, 2836, 2902, 1832, 1874,
+                        1184, 1208, 3232, 3298, 3364, 3430, 2168, 2210,
+                        1114, 1135, 2660, 2711, 2762, 2813, 1624, 1654};
+    ASSERT_EQ(ans0.size(), ans1.size());
+    for (size_t i = 0; i < ans0.size(); ++i)
+        EXPECT_EQ(ans0[i], ans1[i]);
+}
+
+ssize_t getOffset(vector<ssize_t> index, vector<int> shape) {
+    ssize_t ret = index[0];
+    for (size_t i = 1; i < index.size(); ++i)
+        ret = ret * shape[i] + index[i];
+    return ret;
+}
+
+TEST(TConv2gemm, TConv4x4_NHWF_RSFC_correctness_of_input_expr) {
+    const int N = 1, H = 2, W = 2, C = 2, F = 3;
+    const int R = 4, S = 4;
+    RangeOp range0 = buildTConv4x4_NHWF_RSFC(N, C, H, W, F, R, S);
+    Interpreter::Inputs inputs;
+
+    for (const auto &[name, tensor] : GetTensorsVisitor().get(range0)) {
+        auto data = make_ref<vector<int>>(tensor->getSize());
+        if (name == "A") {
+            for (ssize_t i = 0; i < tensor->getSize(); i++)
+                data->operator[](i) = i;
+        } else if (name == "K") {
+            for (ssize_t r = 0; r < R; r++)
+                for (ssize_t s = 0; s < S; s++)
+                    for (ssize_t f = 0; f < F; f++)
+                        for (ssize_t c = 0; c < C; c++) {
+                            ssize_t index =
+                                getOffset({r, s, f, c}, {R, S, F, C});
+                            ssize_t num = getOffset({f, r, s, c}, {F, R, S, C});
+                            data->operator[](index) = num;
+                        }
+
+        } else
+            assert(0);
+        inputs.emplace(name, data);
+    }
+
+    auto ans0 = Interpreter(inputs).interpretAllOutput(range0);
+    // Pytorch results
+    // torch.conv_transpose2d(X, K, stride=2, padding=1, dilation=1)
+    // X, K is NFHW and FCRS
+    vector<int> ans1 = {190,  193,  740,  755,  770,  785,  592,  604,
+                        992,  1016, 2704, 2770, 2836, 2902, 1832, 1874,
+                        1184, 1208, 3232, 3298, 3364, 3430, 2168, 2210,
+                        1114, 1135, 2660, 2711, 2762, 2813, 1624, 1654};
+    ASSERT_EQ(ans0.size(), ans1.size());
+    for (size_t i = 0; i < ans0.size(); ++i)
+        EXPECT_EQ(ans0[i], ans1[i]);
+}
+
+// TODO: Test after passing RSFC
+// TEST(TConv2gemm, TConv4x4_NHWF_FRSC_search) {
+//     const int N = 1, H = 2, W = 2, C = 256, F = 448;
+//     const int R = 4, S = 4;
+//     RangeOp range = buildTConv4x4_NHWF_FRSC(N, C, H, W, F, R, S);
+
+//     const vector<int> rules{3, 2, 2, 2, 2, 5, 8, 8, 6, 90};
+//     Formula conv_9x9(range, 0);
+//     Derivator derivator;
+//     // derivator.ruleBasedDFS(
+//     //     conv_9x9, 0, rules,
+//     //     {{1, {x1, r}}, {2, {y1, s}}, {3, {x2, i2}}, {4, {y2, i4}}});
+//     derivator.dfs(conv_9x9, 0);
+//     // EXPECT_EQ(derivator.getSearchedMaxDepth(), ((int)rules.size()));
+//     ASSERT_GE(derivator.getNumCandidates(), 1);
+//     const auto &formula = derivator.getCandidates().front();
+//     EXPECT_TRUE(CountRoutineVisitor().match(formula.root, 1, 0, 3));
+//     derivator.print();
+// }
+
+TEST(TConv2gemm, TConv4x4_NHWF_RSFC_search) {
+    const int N = 16, H = 2, W = 2, C = 256, F = 448;
+    const int R = 4, S = 4;
+    RangeOp range = buildTConv4x4_NHWF_RSFC(N, C, H, W, F, R, S);
+
+    Formula conv_9x9(range, 0);
+    Derivator derivator;
+
+    bool isRuleBased = false;
+    if (isRuleBased) {
+        const vector<int> rules{3, 2, 2, 2, 2, 5};
+        derivator.setDumpFirstSuccess("TConv4x4_NHWF_RSFC_");
+        derivator.ruleBasedDFS(conv_9x9, 0, rules, {}, true);
+    } else
+        derivator.search(conv_9x9, 0);
+
+    ASSERT_GE(derivator.getNumCandidates(), 1);
+    derivator.print();
+    // for (const auto &f : derivator.getCandidates()) {
+    //     dbg(CountRoutineVisitor().count(f.root));
+    // }
+    int nMatches = matchExprResult(
+        derivator, "../test/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_18.expr");
+    EXPECT_GE(nMatches, 1);
+    derivator.printStatistics();
+}
+
+TEST(TConv2gemm, TConv4x4_NHWF_FRSC_CheckDerivationCorrectness_log) {
+    const string fnPrefix =
+        "../test/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_";
+    EXPECT_TRUE(checkExprLogSame(fnPrefix, 0, 11));
+}
+
+// TODO: correct ConvTransPattern
+TEST(Conv2conv, InfoGAN_ConvTranspose_3_OOB_Test) {
+    // ConvTranspose_3 in InfoGAN
+    const int n = 1, c = 256, h = 2, w = 2, f = 448, r = 4, s = 4;
+    int padding = 1 * (r - 1) - 1;
+    const auto A = nnet::makeTensor("A", {n, h, w, f},
+                                    std::vector<int>{0, padding, padding, 0});
+    const auto K = nnet::makeTensor("K", {f, c, r, s});
+    auto expr = ConvTransPattern::getExpr(A, K, n, c, h, w, f, r, s);
+    dbg(expr);
+    Derivator derivator;
+    derivator.checkOOB(as<RangeOpNode>(expr));
+}
\ No newline at end of file
diff --git a/test/nnet/test_as_tvm.cc b/test/nnet/test_as_tvm.cc
new file mode 100644
index 00000000..d10dab31
--- /dev/null
+++ b/test/nnet/test_as_tvm.cc
@@ -0,0 +1,40 @@
+#include "nnet/Visitor/AsTVMVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+TEST(Conv2conv, 9x9_RuleBased) {
+    DEFINE_VAR(i19);
+    DEFINE_VAR(i20);
+    DEFINE_VAR(i15);
+    DEFINE_VAR(i16);
+    DEFINE_VAR(n);
+    DEFINE_VAR(f);
+    auto T2 = make_ref<TensorNode>("T2", vector<int>({8, 288, 226, 226}));
+    auto S1 = makeRangeOperator(
+        {{i19, {-1, 225}},
+         {i20, {-1, 2}},
+         {i15, {-1, 225}},
+         {i16, {-1, 2}},
+         {n, {0, 8}},
+         {f, {0, 32}}},
+        {}, makeSubscript(T2, {n, 9 * f + 3 * i16 + i20, i15 + 1, i19 + 1}));
+    S1->setPaddings({2, 0, 2, 0, 0, 0});
+    DEFINE_VAR(h);
+    DEFINE_VAR(w);
+    DEFINE_VAR(i13);
+    DEFINE_VAR(i3);
+    auto S2 = makeRangeOperator(
+        {{n, {0, 8}}, {h, {0, 224}}, {w, {0, 224}}, {f, {0, 32}}},
+        {{i13, {-1, 2}}, {i3, {-1, 2}}},
+        makeSubscript(S1, {w + 3 * i13, i13, h + 3 * i3, i3, n, f}));
+    std::cout << S2->toReadable() << std::endl;
+
+    AsTVMVisitor visitor;
+    visitor.dispatch(S2);
+    std::cout << visitor.getStmts() << std::endl;
+}
diff --git a/test/nnet/test_compareFormulas.cc b/test/nnet/test_compareFormulas.cc
new file mode 100644
index 00000000..2957d8cb
--- /dev/null
+++ b/test/nnet/test_compareFormulas.cc
@@ -0,0 +1,45 @@
+#include "nnet/Visitor/CompareMultiFormulasVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+Expr buildConv() {
+    int N = 1, H = 224, W = 224, C = 16, F = 64;
+    int R = 5, S = 5;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r - R / 2, w + s - S / 2});
+    auto subK = makeSubscript(K, {f, c, r, s});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+    return range;
+}
+
+TEST(RangeMagnify, Conv5x5) {
+    vector<Expr> roots;
+    const int cnt = 3;
+    for (int i = 0; i < cnt; ++i)
+        roots.emplace_back(buildConv());
+    EXPECT_TRUE(CompareMultiFormulasVisitor().compare(roots));
+
+    const auto rangeOp = as<RangeOpNode>(roots[0]);
+    ASSERT_TRUE(rangeOp);
+    auto sumVarRanges = rangeOp->getSumVarRanges();
+    sumVarRanges[0].second.first++;
+    rangeOp->setSumIterator(sumVarRanges);
+    EXPECT_FALSE(CompareMultiFormulasVisitor().compare(roots));
+}
\ No newline at end of file
diff --git a/test/nnet/test_conv2conv.cc b/test/nnet/test_conv2conv.cc
new file mode 100644
index 00000000..9cdcc39a
--- /dev/null
+++ b/test/nnet/test_conv2conv.cc
@@ -0,0 +1,126 @@
+#include "nnet/Visitor/CountRoutineVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "nnet/test.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+
+TEST(Conv2conv, 9x9_NCHW_FCRS) {
+    int N = 8, H = 224, W = 224, C = 16, F = 32;
+    int R = 9, S = 9;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r - R / 2, w + s - S / 2});
+    auto subK = makeSubscript(K, {f, c, r, s});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+
+    Formula conv_9x9(range, 0);
+    Derivator derivator(8);
+
+    bool isRuleBased = false;
+    if (isRuleBased) {
+        const vector<int> rules{1, 1, 3, 2, 2, 5, 8, 8, 6, 90};
+        // derivator.setDumpFirstSuccess("Conv2conv_9x9_NCHW_FCRS_");
+        derivator.ruleBasedDFS(conv_9x9, 0, rules, {}, true);
+    } else
+        derivator.search(conv_9x9, 0);
+
+    int nMatches = matchExprResult(
+        derivator, "../test/log/conv2conv/Conv2conv_9x9_NCHW_FCRS_14.expr");
+    derivator.print();
+    derivator.printStatistics();
+    EXPECT_GE(nMatches, 1);
+}
+
+TEST(Conv2conv, 6x6_RuleBased_NCHW_FCRS) {
+    int N = 1, H = 224, W = 224, C = 16, F = 64;
+    int R = 6, S = 6;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, C, R, S}));
+
+    auto subA =
+        makeSubscript(A, {n, c, h + r - (R - 1) / 2, w + s - (S - 1) / 2});
+    auto subK = makeSubscript(K, {f, c, r, s});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+
+    Formula conv_6x6(range, 0);
+    Derivator derivator;
+
+    bool isRuleBased = false;
+    if (isRuleBased) {
+        const vector<int> rules{1, 1, 3, 2, 2, 5, 8, 8, 6, 6};
+        // derivator.setDumpFirstSuccess("Conv2conv_6x6_NCHW_FCRS_");
+        derivator.ruleBasedDFS(conv_6x6, 0, rules, {}, true);
+    } else
+        derivator.search(conv_6x6, 0);
+
+    ASSERT_GE(derivator.getNumCandidates(), 1);
+    int nMatches = matchExprResult(
+        derivator, "../test/log/conv2conv/Conv2conv_6x6_NCHW_FCRS_14.expr");
+    derivator.print();
+    derivator.printStatistics();
+    EXPECT_GE(nMatches, 1);
+}
+
+TEST(Conv2conv, 5x5_RuleBased_NCHW_FCRS) {
+    int N = 16, C = 32, H = 224, W = 224, F = 1;
+    int R = 5, S = 5;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r - R / 2, w + s - S / 2});
+    auto subK = makeSubscript(K, {f, c, r, s});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+
+    Formula conv_9x9(range, 0);
+    Derivator derivator(7);
+
+    bool isRuleBased = false;
+    if (isRuleBased) {
+        const vector<int> rules{9, 1, 1, 3, 2, 2, 5, 8, 8, 6, 6};
+        derivator.setDumpFirstSuccess("Conv2conv_5x5_NCHW_FCRS_");
+        derivator.ruleBasedDFS(conv_9x9, 0, rules, {}, true);
+    } else
+        derivator.search(conv_9x9, 0);
+
+    int nMatches = matchExprResult(
+        derivator, "../test/log/conv2conv/Conv2conv_5x5_NCHW_FCRS_15.expr");
+    derivator.print();
+    derivator.printStatistics();
+    EXPECT_GE(nMatches, 1);
+}
\ No newline at end of file
diff --git a/test/nnet/test_conv2gemm.cc b/test/nnet/test_conv2gemm.cc
new file mode 100644
index 00000000..25cadae8
--- /dev/null
+++ b/test/nnet/test_conv2gemm.cc
@@ -0,0 +1,282 @@
+#include "nnet/Visitor/CountRoutineVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "nnet/test.h"
+#include "gtest/gtest.h"
+#include <chrono>
+using namespace nnet;
+using namespace std;
+
+TEST(Conv2gemm, NCHW_FCRS_ruleBased) {
+    // A[n,h+r,w+s,c]*K[r,s,f,c]
+    const int N = 8, H = 224, W = 224, C = 16, F = 32, R = 3, S = 3;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r - R / 2, w + s - S / 2});
+    auto subK = makeSubscript(K, {f, c, r, s});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+    // cout << range->toReadable() << endl;
+
+    // Derivation
+    Formula conv_1x1_nhwc_fc(range, 0);
+    Derivator derivator(12);
+    // const vector<int> rules = {3, 2, 2, 5, 2, 2, 6, 6};
+    const vector<int> rules = {3, 2, 2, 5, 8, 8, 6, 90};
+    // derivator.ruleBasedDFS(conv_1x1_nhwc_fc, 0, rules);
+    derivator.search(conv_1x1_nhwc_fc, 0);
+    // Stage merge with padding is not realized
+    EXPECT_EQ(derivator.getSearchedMaxDepth(), 5);
+    ASSERT_GT(derivator.getNumCandidates(), 0);
+    derivator.print();
+    bool hasMatch = false;
+    for (const auto &formula : derivator.getCandidates()) {
+        if (CountRoutineVisitor().match(formula.root, 1, 0, 3))
+            hasMatch = true;
+    }
+    EXPECT_TRUE(hasMatch);
+}
+
+TEST(Conv2gemm, NHWC_RSFC_ruleBased) {
+    // A[n,h+r,w+s,c]*K[r,s,f,c]
+    const int N = 8, H = 224, W = 224, C = 16, F = 32, R = 3, S = 3;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, C}),
+                                  vector<int>{0, R / 2, S / 2, 0});
+    // auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+    auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+
+    auto subA = makeSubscript(A, {n, h + r - R / 2, w + s - S / 2, c});
+    auto subK = makeSubscript(K, {r, s, f, c});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+    // cout << range->toReadable() << endl;
+
+    // Derivation
+    Formula conv_1x1_nhwc_fc(range, 0);
+    Derivator derivator(5);
+    // const vector<int> rules = {3, 2, 2, 5, 2, 2, 6, 6};
+    const vector<int> rules = {3, 2, 2, 5, 8, 8, 6, 90};
+    // derivator.ruleBasedDFS(conv_1x1_nhwc_fc, 0, rules);
+    derivator.search(conv_1x1_nhwc_fc, 0);
+    // Stage merge with padding is not realized
+    EXPECT_EQ(derivator.getSearchedMaxDepth(), 5);
+    EXPECT_GE(derivator.getNumCandidates(), 1);
+    derivator.print();
+    derivator.printStatistics();
+}
+
+TEST(Conv2gemm, Derivation_dfs) {
+    // A[n,h+r,w+s,c]*K[r,s,f,c]
+    int N = 8, H = 224, W = 224, C = 16, F = 32;
+    int R = 3, S = 3;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, C}),
+                                  vector<int>{0, R / 2, S / 2, 0});
+    auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+
+    auto subA = makeSubscript(A, {n, h + r - R / 2, w + s - S / 2, c});
+    auto subK = makeSubscript(K, {r, s, f, c});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+    // cout << range->toReadable() << endl;
+
+    // Derivation
+    Formula conv_1x1_nhwc_fc(range, 0);
+    Derivator derivator(12);
+    derivator.search(conv_1x1_nhwc_fc, 0);
+    EXPECT_GT(derivator.getNumCandidates(), 0);
+    derivator.print();
+}
+
+void Conv2gemm_NHWC_RSFC_search(int maxDepth, bool enalbeHashPruning) {
+    // A[n,h+r,w+s,c]*K[r,s,f,c]
+    int N = 1, H = 7, W = 7, C = 32, F = 32, R = 3, S = 3;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, C}),
+                                  vector<int>{0, R / 2, S / 2, 0});
+    // auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+    auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+
+    auto subA = makeSubscript(A, {n, h + r - R / 2, w + s - S / 2, c});
+    auto subK = makeSubscript(K, {r, s, f, c});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+    // cout << ange->toReadable() << endl;
+
+    // Derivation
+    Formula conv_1x1_nhwc_fc(range, 0);
+    Derivator derivator(maxDepth, enalbeHashPruning);
+    // const vector<int> rules = {3, 2, 2, 5, 2, 2, 6, 6};
+    const vector<int> rules = {3, 2, 2, 5, 8, 8, 6, 90};
+    // derivator.ruleBasedDFS(conv_1x1_nhwc_fc, 0, rules);
+    derivator.search(conv_1x1_nhwc_fc, 0);
+    // Stage merge with padding is not realized
+    EXPECT_EQ(derivator.getSearchedMaxDepth(), maxDepth);
+    EXPECT_GE(derivator.getNumCandidates(), 1);
+    derivator.printStatistics();
+}
+
+TEST(Conv2gemm, timing_NHWC_RSFC_search) {
+    for (bool enalbeHashPruning : {true, false}) {
+        // Disabled to pass 10s time limit
+        for (int maxDepth = 5; maxDepth < 5; ++maxDepth) {
+            printf("Max depth = %d, Hash = %d\n", maxDepth, enalbeHashPruning);
+            auto t_start = std::chrono::high_resolution_clock::now();
+            Conv2gemm_NHWC_RSFC_search(maxDepth, enalbeHashPruning);
+            auto t_end = std::chrono::high_resolution_clock::now();
+            double elapsed_time_s =
+                std::chrono::duration<double, std::milli>(t_end - t_start)
+                    .count() /
+                1000;
+            printf("Elapsed time (s) = %lf\n", elapsed_time_s);
+        }
+    }
+}
+
+// Conv2gemm requires thorough update, this is disabled temporarily
+TEST(Conv2gemm, CheckCorrectness) {
+    const string fnPrefix = "../test/log/conv2gemm/Conv2gemm_NCHW_RSFC_";
+    // conv2gemm_7 has T3
+    EXPECT_TRUE(checkExprLogSame(fnPrefix, 0, 7));
+}
+
+TEST(Conv2gemm, NCHW_RSFC_search) {
+    // A[n,h+r,w+s,c]*K[r,s,f,c]
+    const int N = 1, H = 7, W = 7, C = 512, F = 512, R = 3, S = 3;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, C}),
+                                  vector<int>{0, R / 2, S / 2, 0});
+    auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+
+    auto subA = makeSubscript(A, {n, h + r - R / 2, w + s - S / 2, c});
+    auto subK = makeSubscript(K, {r, s, f, c});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+
+    // Derivation
+    Formula conv_3x3_nhwc_rsfc(range, 0);
+    Derivator derivator(10);
+
+    bool isRuleBased = false;
+    if (isRuleBased) {
+        const vector<int> rules = {3, 2, 2, 5, 8, 8, 6, 90};
+        // derivator.setDumpFirstSuccess("Conv2gemm_NCHW_FCRS_");
+        derivator.ruleBasedDFS(conv_3x3_nhwc_rsfc, 0, rules);
+    } else {
+        derivator.search(conv_3x3_nhwc_rsfc, 0);
+    }
+
+    ASSERT_GE(derivator.getNumCandidates(), 1);
+    int nMatches = matchExprResult(
+        derivator, "../test/log/conv2gemm/Conv2gemm_NCHW_RSFC_11.expr");
+    EXPECT_GE(nMatches, 1);
+    // derivator.print();
+    derivator.printStatistics();
+}
+
+TEST(Conv2gemm1x1, NHWC_RSFC_ruleBased) {
+    // A[n,h+r,w+s,c]*K[r,s,f,c]
+    const int N = 1, H = 7, W = 7, C = 512, F = 512, R = 1, S = 1;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, C}));
+    auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+
+    auto subA = makeSubscript(A, {n, h + r, w + s, c});
+    auto subK = makeSubscript(K, {r, s, f, c});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+
+    Formula conv_1x1_nhwc_fc(range, 0);
+    Derivator derivator(7);
+    const vector<int> rules = {3, 2, 2, 8, 8, 6, 6};
+    derivator.ruleBasedDFS(conv_1x1_nhwc_fc, 0, rules);
+    ASSERT_GT(derivator.getNumCandidates(), 0);
+    derivator.printStatistics();
+    bool hasMatch = false;
+    for (const auto &formula : derivator.getCandidates()) {
+        if (CountRoutineVisitor().match(formula.root, 1, 0, 3))
+            hasMatch = true;
+    }
+    EXPECT_TRUE(hasMatch);
+}
+
+TEST(Conv2gemm1x1, NCHW_FCRS_search) {
+    // A[n,h+r,w+s,c]*K[f,c,r,s]
+    const int N = 1, H = 7, W = 7, C = 512, F = 512, R = 1, S = 1;
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, R / 2, S / 2, 0});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r - R / 2, w + s - S / 2});
+    auto subK = makeSubscript(K, {f, c, r, s});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+
+    // Derivation
+    Formula conv_3x3_nhwc_rsfc(range, 0);
+    Derivator derivator(10);
+
+    bool isRuleBased = false;
+    if (isRuleBased) {
+        const vector<int> rules = {3, 2, 2, 8, 8, 6, 6};
+        // derivator.setDumpFirstSuccess("Conv2gemm_NCHW_FCRS_");
+        derivator.ruleBasedDFS(conv_3x3_nhwc_rsfc, 0, rules);
+    } else {
+        derivator.search(conv_3x3_nhwc_rsfc, 0);
+    }
+
+    ASSERT_GE(derivator.getNumCandidates(), 1);
+}
+
+TEST(Conv2gemm1x7, NCHW_FCRS_search) {
+    const int N = 1, C = 2048, H = 7, W = 7, F = 128, R = 1,
+              S = 7; // gcn_Conv_137
+    DEFINE_VAR(n, c, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r - R / 2, w + s - S / 2});
+    auto subK = makeSubscript(K, {f, c, r, s});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+
+    // Derivation
+    Formula conv_1x7(range, 0);
+    Derivator derivator(10, true, nnet::Derivator::LogMode::NoLog);
+
+    bool isRuleBased = false;
+    if (isRuleBased) {
+        const vector<int> rules = {3, 2, 2, 5, 8, 8, 6, 90};
+        derivator.setDumpFirstSuccess("Conv2gemm_1x7_NCHW_FCRS_");
+        derivator.ruleBasedDFS(conv_1x7, 0, rules);
+    } else {
+        derivator.search(conv_1x7, 0);
+    }
+
+    ASSERT_GE(derivator.getNumCandidates(), 1);
+    int nMatches = matchExprResult(
+        derivator, "../test/log/conv2gemm_1x7/Conv2gemm_1x7_NCHW_FCRS_11.expr");
+    EXPECT_GE(nMatches, 1);
+}
\ No newline at end of file
diff --git a/test/nnet/test_dlt.cc b/test/nnet/test_dlt.cc
new file mode 100644
index 00000000..914b03d9
--- /dev/null
+++ b/test/nnet/test_dlt.cc
@@ -0,0 +1,78 @@
+#include "nnet/dlt.h"
+#include "nnet/expr.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+TEST(DLT, Simple) {
+    DEFINE_VAR(c);
+    DEFINE_VAR(f);
+    DEFINE_VAR(p1);
+    DEFINE_VAR(p2);
+    DEFINE_VAR(q1);
+    DEFINE_VAR(q2);
+    int C = 12, F = 16, R = 9, S = 9;
+    auto A = make_ref<TensorNode>("A", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {f, c, 3 * p1 + p2, 3 * q1 + q2});
+    auto rangeOp =
+        makeRangeOperator({{p1, {0, 3}}, {q1, {0, 3}}, {f, {0, F}}},
+                          {{c, {0, C}}, {p2, {0, 3}}, {q2, {0, 3}}}, subA);
+    DLT dlt;
+    dlt.split(2, 3);
+    auto opt = dlt.apply(rangeOp, subA, "dltedA");
+    ASSERT_TRUE(opt.has_value());
+    auto sub = as<SubscriptNode>(*opt);
+    dbg(rangeOp, sub);
+    ASSERT_TRUE(sub != nullptr);
+    EXPECT_EQ(sub->getDims(), 5u);
+    EXPECT_EQ(sub->getIndex(2)->hash(), p1->hash());
+    EXPECT_EQ(sub->getIndex(3)->hash(), p2->hash());
+}
+
+TEST(DLT, Conv2Conv) {
+    DEFINE_VAR(c);
+    DEFINE_VAR(f);
+    DEFINE_VAR(p1);
+    DEFINE_VAR(p2);
+    DEFINE_VAR(q1);
+    DEFINE_VAR(q2);
+    int C = 12, F = 16, R = 9, S = 9;
+    auto A = make_ref<TensorNode>("A", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {f, c, 3 * p1 + p2, 3 * q1 + q2});
+    auto rangeOp =
+        makeRangeOperator({{p1, {0, 3}}, {q1, {0, 3}}, {f, {0, F}}},
+                          {{c, {0, C}}, {p2, {0, 3}}, {q2, {0, 3}}}, subA);
+    DLT dlt;
+    dlt.split(3, 3);
+    dlt.split(2, 3);
+    dlt.merge(0, 2);
+    dlt.merge(0, 3);
+    auto opt = dlt.apply(rangeOp, subA, "dltedA");
+    ASSERT_TRUE(opt.has_value());
+    auto sub = as<SubscriptNode>(*opt);
+    ASSERT_TRUE(sub != nullptr);
+    EXPECT_EQ(sub->getDims(), 4u);
+}
+
+TEST(DLT, Wrong0) {
+    DEFINE_VAR(c);
+    DEFINE_VAR(f);
+    DEFINE_VAR(p1);
+    DEFINE_VAR(p2);
+    DEFINE_VAR(q1);
+    DEFINE_VAR(q2);
+    int C = 12, F = 16, R = 9, S = 9;
+    auto A = make_ref<TensorNode>("A", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {f, c, 3 * p1 + p2, 3 * q1 + q2});
+    auto rangeOp =
+        makeRangeOperator({{p1, {0, 3}}, {q1, {0, 3}}, {f, {0, F}}},
+                          {{c, {0, C}}, {p2, {0, 4}}, {q2, {0, 3}}}, subA);
+    DLT dlt;
+    dlt.split(2, 3);
+    auto opt = dlt.apply(rangeOp, subA, "dltedA");
+    ASSERT_FALSE(opt.has_value());
+}
diff --git a/test/nnet/test_g2bmm.cc b/test/nnet/test_g2bmm.cc
new file mode 100644
index 00000000..9411bad9
--- /dev/null
+++ b/test/nnet/test_g2bmm.cc
@@ -0,0 +1,83 @@
+#include "nnet/Visitor/CountRoutineVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "nnet/test.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+
+TEST(GBMM, RuleBased) {
+    const int n_heads = 8, seq_len = 10000, feat_len = 512;
+    const int Batch = n_heads, M = seq_len, K = feat_len, W = 32, dilation = 4;
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(n);
+    DEFINE_VAR(w);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, 2 * W + 1}),
+                                  vector<int>{0, 0, 0});
+    auto B = make_ref<TensorNode>("B", vector<int>({Batch, M, K}),
+                                  vector<int>{0, dilation * W, 0});
+    auto subA = makeSubscript(A, {b, m, w});
+    // auto subB = makeSubscript(B, {b, m + dilation * (w - W), n});
+    auto subB = makeSubscript(B, {b, m + dilation * w - dilation * W, n});
+    auto range = makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {n, {0, K}}},
+                                   {{w, {0, 2 * W + 1}}}, subA * subB);
+    dbg(range);
+
+    // Derivation: this work without padding check in stage merging
+    Formula dialted_g2bmm(range, 0);
+    Derivator derivator;
+
+    bool isRuleBased = false;
+    if (isRuleBased) {
+        const vector<int> rules{1, 7, 7, 2, 8, 6, 6};
+        derivator.setDumpFirstSuccess("GBMM_");
+        derivator.ruleBasedDFS(dialted_g2bmm, 0, rules);
+    } else {
+        derivator.search(dialted_g2bmm, 0);
+    }
+
+    ASSERT_GE(derivator.getNumCandidates(), 1);
+    int nMatches = matchExprResult(derivator, "../test/log/gbmm/GBMM_9.expr");
+    EXPECT_GE(nMatches, 1);
+    derivator.print();
+    derivator.printStatistics();
+}
+
+TEST(G2BMM, RuleBased) {
+    const int n_heads = 8, seq_len = 10000, feat_len = 512;
+    const int Batch = n_heads, M = seq_len, K = feat_len, W = 32, dilation = 4;
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, K}),
+                                  vector<int>{0, 0, 0});
+    auto B = make_ref<TensorNode>("B", vector<int>({Batch, M, K}),
+                                  vector<int>{0, dilation * W, 0});
+
+    auto subA = makeSubscript(A, {b, m, k});
+    auto subB = makeSubscript(B, {b, m + dilation * (w - W), k});
+    auto range =
+        makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {w, {0, 2 * W + 1}}},
+                          {{k, {0, K}}}, subA * subB);
+
+    // Derivation: this work without padding check in stage merging
+    Formula dialted_g2bmm(range, 0);
+    Derivator derivator;
+
+    bool isRuleBased = false;
+    if (isRuleBased) {
+        const vector<int> rules{1, 7, 7, 2, 8, 6, 6};
+        derivator.setDumpFirstSuccess("G2BMM_");
+        derivator.ruleBasedDFS(dialted_g2bmm, 0, rules);
+    } else {
+        derivator.search(dialted_g2bmm, 0);
+    }
+
+    ASSERT_GE(derivator.getNumCandidates(), 1);
+    int nMatches = matchExprResult(derivator, "../test/log/g2bmm/G2BMM_9.expr");
+    EXPECT_GE(nMatches, 1);
+    derivator.print();
+    derivator.printStatistics();
+}
diff --git a/test/nnet/test_guidedDLT.cc b/test/nnet/test_guidedDLT.cc
new file mode 100644
index 00000000..960c5e22
--- /dev/null
+++ b/test/nnet/test_guidedDLT.cc
@@ -0,0 +1,410 @@
+#include "nnet/Pass/Rule8GuidedDLT.h"
+#include "nnet/Visitor/CountRoutineVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "nnet/iterator_table.h"
+#include "nnet/permutation.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+TEST(GuidedDLT, Permuation) {
+    // auto multiExprVar = {{"c", "i14", "i4"}, {"i17", "i22", "n"}};
+    DEFINE_VAR(_Conv_c);
+    DEFINE_VAR(_Conv_r);
+    DEFINE_VAR(_Conv_s);
+    DEFINE_VAR(_Conv_h);
+    DEFINE_VAR(_Conv_n);
+    DEFINE_VAR(_Conv_w);
+    DEFINE_VAR(c);
+    DEFINE_VAR(i14);
+    DEFINE_VAR(i4);
+    DEFINE_VAR(i17);
+    DEFINE_VAR(i22);
+    DEFINE_VAR(n);
+
+    // PermutationGenerator permutator{
+    //     {{"_Conv_c", "_Conv_r", "_Conv_s"}, {"_Conv_h", "_Conv_n",
+    //     "_Conv_w"}},
+    //     {{"c", "i14", "i4"}, {"i17", "i22", "n"}}};
+    PermutationGenerator permutator{
+        {{_Conv_c, _Conv_r, _Conv_s}, {_Conv_h, _Conv_n, _Conv_w}},
+        {{c, i14, i4}, {i17, i22, n}}};
+    int cnt = 0;
+    do {
+        cnt++;
+        dbg(permutator.get());
+    } while (permutator.next());
+    EXPECT_EQ(cnt, 6 * 6);
+}
+
+TEST(GuidedDLT, dimFusion_ConvToGemm_1Tensor) {
+    int N = 8, K = 16;
+
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto n = make_ref<VarNode>("n");
+    auto t1 = make_ref<VarNode>("t1");
+    auto t2 = make_ref<VarNode>("t2");
+    auto f = make_ref<VarNode>("f");
+    auto c = make_ref<VarNode>("c");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, N, N, K}));
+    auto B = make_ref<TensorNode>("B", vector<int>({N, K}));
+
+    auto subA = makeSubscript(A, {n, t1, t2, c});
+    auto subB = makeSubscript(B, {r, c});
+    auto range = makeRangeOperator(
+        {{n, {0, N}}, {t1, {0, N}}, {t2, {0, N}}, {r, {0, N}}}, {{c, {0, K}}},
+        subA * subB);
+    // Derivation
+    Derivator derivator(2);
+    {
+        dbg(range);
+        Formula matmul(range, 0);
+        Derivator derivator(3);
+        Rule8GuidedDLT pass(derivator);
+        auto ret = pass.guidedDLT(matmul, 1, matmul.root, true);
+        ASSERT_GE(ret.size(), 1u);
+        dbg(ret);
+        EXPECT_EQ(ret.size(), 1u);
+        auto rangeOp = as<RangeOpNode>(ret[0]);
+        ASSERT_TRUE(rangeOp != nullptr);
+        EXPECT_EQ(rangeOp->getLoopVarRanges().size(), 4u);
+        EXPECT_EQ(rangeOp->getSumVarRanges().size(), 0u);
+        dbg(rangeOp, rangeOp->getSummand());
+        auto sub = as<SubscriptNode>(rangeOp->getSummand());
+        ASSERT_TRUE(sub != nullptr);
+        auto inner = as<RangeOpNode>(sub->getObject());
+        ASSERT_TRUE(inner != nullptr);
+        EXPECT_EQ(inner->getSumVarRanges().size(), 1u);
+        EXPECT_EQ(inner->getLoopVarRanges().size(), 2u);
+    }
+}
+
+TEST(GuidedDLT, dimFusion_ConvToGemm_1step) {
+    int N = 8, K = 16;
+
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto n = make_ref<VarNode>("n");
+    auto t1 = make_ref<VarNode>("t1");
+    auto t2 = make_ref<VarNode>("t2");
+    auto f = make_ref<VarNode>("f");
+    auto c = make_ref<VarNode>("c");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, N, N, K}));
+    auto B = make_ref<TensorNode>("B", vector<int>({N, N, N, K}));
+
+    auto subA = makeSubscript(A, {n, t1, t2, c});
+    auto subB = makeSubscript(B, {r, s, f, c});
+    auto range = makeRangeOperator({{r, {0, N}},
+                                    {s, {0, N}},
+                                    {n, {0, N}},
+                                    {t1, {0, N}},
+                                    {t2, {0, N}},
+                                    {f, {0, N}}},
+                                   {{c, {0, K}}}, subA * subB);
+    // Derivation
+    {
+        Formula matmul(range, 0);
+        Derivator derivator(1);
+        Rule8GuidedDLT pass(derivator);
+        auto ret = pass.guidedDLT(matmul, 1, matmul.root, true);
+        ASSERT_GE(ret.size(), 1u);
+        dbg(ret);
+        for (const auto &cur : ret) {
+            auto rangeOp = as<RangeOpNode>(cur);
+            ASSERT_TRUE(rangeOp != nullptr);
+            EXPECT_EQ(rangeOp->getLoopVarRanges().size(), 6u);
+            EXPECT_EQ(rangeOp->getSumVarRanges().size(), 0u);
+            dbg(rangeOp, rangeOp->getSummand());
+            auto sub = as<SubscriptNode>(rangeOp->getSummand());
+            ASSERT_TRUE(sub != nullptr);
+            auto inner = as<RangeOpNode>(sub->getObject());
+            ASSERT_TRUE(inner != nullptr);
+            EXPECT_EQ(inner->getSumVarRanges().size(), 1u);
+            EXPECT_EQ(inner->getLoopVarRanges().size(), 4u);
+        }
+    }
+}
+
+TEST(GuidedDLT, dimFusion_ConvToGemm_real_2tensors) {
+    int N = 8, K = 16;
+
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto n = make_ref<VarNode>("n");
+    auto t1 = make_ref<VarNode>("t1");
+    auto t2 = make_ref<VarNode>("t2");
+    auto f = make_ref<VarNode>("f");
+    auto c = make_ref<VarNode>("c");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, N, N, K}));
+    auto B = make_ref<TensorNode>("B", vector<int>({N, N, N, K}));
+
+    auto subA = makeSubscript(A, {n, t1, t2, c});
+    auto subB = makeSubscript(B, {r, s, f, c});
+    auto range = makeRangeOperator({{r, {0, N}},
+                                    {s, {0, N}},
+                                    {n, {0, N}},
+                                    {t1, {0, N}},
+                                    {t2, {0, N}},
+                                    {f, {0, N}}},
+                                   {{c, {0, K}}}, subA * subB);
+    // Derivation
+    {
+        Formula matmul(range, 0);
+        Derivator derivator(2);
+        const vector<int> rules = {8, 8};
+        derivator.ruleBasedDFS(matmul, 0, rules);
+        EXPECT_EQ(derivator.getSearchedMaxDepth(), 2);
+    }
+}
+
+TEST(GuidedDLT, Conv2Conv_KernelDLT) {
+    int N = 8, H = 224, W = 224, C = 16, F = 32;
+    int R = 9, S = 9;
+    DEFINE_VAR(i19);
+    DEFINE_VAR(i20);
+    DEFINE_VAR(j15);
+    DEFINE_VAR(j16);
+    DEFINE_VAR(j14);
+    DEFINE_VAR(j4);
+    DEFINE_VAR(n);
+    DEFINE_VAR(f);
+    DEFINE_VAR(c);
+    // auto A =
+    //     make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+    //     vector<int>{0, 0, 1, 1});
+    auto A = makeTensor("A", {N, C, H, W}, {0, 0, 3, 3});
+    auto B = make_ref<TensorNode>("W", vector<int>({F, C, R, S}));
+    // cur =
+    // L<i19:-1:226><i20:-1:2><i15:-1:226><i16:-1:2><n:0:8><f:0:32><pad=2,0,2,0,0,0,>Sum<i14:-1:2><i4:-1:2><c:0:16>
+    //     {({A<pad=0,0,4,4>}[n, c, (i15 + i4), (i14 + i19)] * {K}[f, c, ((3 *
+    //     i16) + i4), (i14 + (3 * i20))])} (std::shared_ptr<nnet::RangeOpNode>)
+
+    auto subA = makeSubscript(A, {n, c, (j15 + j4 - 1), (j14 - 1 + i19)});
+    auto subB = makeSubscript(B, {f, c, ((3 * j16) + j4), (j14 + (3 * i20))});
+    auto range = makeRangeOperator({{i19, {-1, 226}},
+                                    {i20, {0, 3}},
+                                    {j15, {-1, 226}},
+                                    {j16, {0, 3}},
+                                    {n, {0, 8}},
+                                    {f, {0, 32}}},
+                                   {{j14, {0, 3}}, {j4, {0, 3}}, {c, {0, 16}}},
+                                   subA * subB);
+    // Derivation
+    {
+        Formula conv(range, 0);
+        Derivator derivator(2);
+        derivator.setSearchState(1);
+        Rule8GuidedDLT pass(derivator);
+        auto ret = pass.guidedDLT(conv, 1, conv.root, true);
+        ASSERT_GE(ret.size(), 1u);
+        EXPECT_EQ(ret.size(), 1u);
+        auto rangeOp = as<RangeOpNode>(ret[0]);
+        ASSERT_TRUE(rangeOp != nullptr);
+        EXPECT_EQ(rangeOp->getLoopVarRanges().size(), 6u);
+        EXPECT_EQ(rangeOp->getSumVarRanges().size(), 0u);
+        dbg(rangeOp, rangeOp->getSummand());
+        auto sub = as<SubscriptNode>(rangeOp->getSummand());
+        ASSERT_TRUE(sub != nullptr);
+        auto inner = as<RangeOpNode>(sub->getObject());
+        ASSERT_TRUE(inner != nullptr);
+        EXPECT_EQ(inner->getSumVarRanges().size(), 3u);
+        EXPECT_EQ(inner->getLoopVarRanges().size(), 4u);
+    }
+}
+
+// TEST(GuidedDLT, Conv2Conv_outputDLT) {
+//     int N = 8, H = 224, W = 224, C = 16, F = 32;
+//     int R = 9, S = 9;
+//     DEFINE_VAR(j101);
+//     DEFINE_VAR(j55);
+//     DEFINE_VAR(j79);
+//     DEFINE_VAR(j14);
+//     DEFINE_VAR(j4);
+//     DEFINE_VAR(n);
+//     DEFINE_VAR(c);
+//     auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}));
+//     auto B = make_ref<TensorNode>("W", vector<int>({F, C, R, S}));
+//     //
+//     {L<i101:0:288><i79:-3:227><i55:-3:227><n:0:8>Sum<i14:-1:2><i4:-1:2><c:0:16>
+//     // {({A<pad=0,0,4,4>}[n, c, (i4 + i55), (i14 + i79)] * {T1}[i101, c, i4,
+//     // i14])}}}}}
+//     auto subA = makeSubscript(A, {n, c, (j4 + j55), (j14 + j79)});
+//     auto subB = makeSubscript(B, {j101, c, j4, j14});
+//     auto range = makeRangeOperator(
+//         {{j101, {0, 288}}, {j79, {-3, 227}}, {j55, {-3, 227}}, {n, {0, 8}}},
+//         {{j14, {-1, 2}}, {j4, {-1, 2}}, {c, {0, 16}}}, subA * subB);
+//     // Derivation
+//     {
+//         Formula conv(range, 0);
+//         Derivator derivator(2);
+//         auto ret = derivator.guidedDLT(conv, 1, conv.root, true);
+//         dbg(ret);
+//         ASSERT_GE(ret.size(), 1);
+//         EXPECT_EQ(ret.size(), 1);
+//         auto rangeOp = as<RangeOpNode>(ret[0]);
+//         ASSERT_TRUE(rangeOp != nullptr);
+//         EXPECT_EQ(rangeOp->getLoopVarRanges().size(), 4);
+//         EXPECT_EQ(rangeOp->getSumVarRanges().size(), 0);
+//         dbg(rangeOp, rangeOp->getSummand());
+//         auto sub = as<SubscriptNode>(rangeOp->getSummand());
+//         ASSERT_TRUE(sub != nullptr);
+//         auto inner = as<RangeOpNode>(sub->getObject());
+//         ASSERT_TRUE(inner != nullptr);
+//         EXPECT_EQ(inner->getSumVarRanges().size(), 3);
+//         ASSERT_EQ(inner->getLoopVarRanges().size(), 4);
+//         const auto expectedOrder = vector{n, j101, j55, j79};
+//         for (int i = 0; i < 4; ++i) {
+//             EXPECT_EQ(inner->getLoopVar(i)->getName(),
+//                       expectedOrder[i]->getName());
+//         }
+//     }
+// }
+
+TEST(GuidedDLT, dimFusion_ConvToGemm_2Tensor_ruleBased) {
+    int N = 8, K = 16;
+
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto n = make_ref<VarNode>("n");
+    auto t1 = make_ref<VarNode>("t1");
+    auto t2 = make_ref<VarNode>("t2");
+    auto f = make_ref<VarNode>("f");
+    auto c = make_ref<VarNode>("c");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, N, N, K}));
+    auto B = make_ref<TensorNode>("B", vector<int>({N, N, N, K}));
+
+    auto subA = makeSubscript(A, {n, t1, t2, c});
+    auto subB = makeSubscript(B, {r, s, f, c});
+    auto range = makeRangeOperator({{r, {0, N}},
+                                    {s, {0, N}},
+                                    {n, {0, N}},
+                                    {t1, {0, N}},
+                                    {t2, {0, N}},
+                                    {f, {0, N}}},
+                                   {{c, {0, K}}}, subA * subB);
+    // Derivation
+    Formula matmul(range, 0);
+    {
+        Derivator derivator(5);
+        // derivator.ruleBasedDFS(matmul, 0, {2, 2, 2, 6, 4, 4, 6});
+        // derivator.ruleBasedDFS(matmul, 0, {2, 2, 2, 6, 6});
+        derivator.ruleBasedDFS(matmul, 0, {8, 8, 6, 6});
+        dbg(derivator.getNumCandidates());
+        EXPECT_GT(derivator.getNumCandidates(), 0);
+        bool simplestMatched = false;
+        for (const auto &formula : derivator.getCandidates()) {
+            auto routineCnts = CountRoutineVisitor().count(formula.root);
+            if (routineCnts[routineTypeToId(
+                    RoutineType::ElementWiseNodeType)] == 3 &&
+                routineCnts[routineTypeToId(RoutineType::MatmulNodeType)] == 1)
+                simplestMatched = true;
+        }
+        EXPECT_TRUE(simplestMatched);
+    }
+}
+
+TEST(GuidedDLT, dimFusion_ConvToGemm_2Tensor_dfs) {
+    int N = 8, K = 16;
+
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto n = make_ref<VarNode>("n");
+    auto t1 = make_ref<VarNode>("t1");
+    auto t2 = make_ref<VarNode>("t2");
+    auto f = make_ref<VarNode>("f");
+    auto c = make_ref<VarNode>("c");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, N, N, K}));
+    auto B = make_ref<TensorNode>("B", vector<int>({N, N, N, K}));
+
+    auto subA = makeSubscript(A, {n, t1, t2, c});
+    auto subB = makeSubscript(B, {r, s, f, c});
+    auto range = makeRangeOperator({{r, {0, N}},
+                                    {s, {0, N}},
+                                    {n, {0, N}},
+                                    {t1, {0, N}},
+                                    {t2, {0, N}},
+                                    {f, {0, N}}},
+                                   {{c, {0, K}}}, subA * subB);
+    // Derivation
+    Formula matmul(range, 0);
+    {
+        Derivator derivator(0);
+        derivator.search(matmul, 0);
+        dbg(derivator.getNumCandidates());
+        EXPECT_GT(derivator.getNumCandidates(), 0);
+        bool simplestMatched = false;
+        for (const auto &formula : derivator.getCandidates()) {
+            auto routineCnts = CountRoutineVisitor().count(formula.root);
+            // dbg("&&&&&&&&&&&&&&&&&&&&&", formula.bfsDepth, formula.root,
+            //     routineCnts);
+            // dbg(FullPrinterVisitor().print(formula.root));
+            if (routineCnts[routineTypeToId(
+                    RoutineType::ElementWiseNodeType)] == 3 &&
+                routineCnts[routineTypeToId(RoutineType::MatmulNodeType)] == 1)
+                simplestMatched = true;
+        }
+        EXPECT_TRUE(simplestMatched);
+    }
+}
+
+//     {L<i21:0:576><i19:2:228><i15:2:228><n:0:1>Sum<i14:0:3><i4:0:3><c:0:1>
+//     {({A<pad=0,0,4,4>}[n, c, ((i15 + i4) + -4), ((i14 + i19) + -4)] *
+//     {T1}[i21, c, i4, i14])}}}
+// ==> A : Input Tensor shape=[1,1,224,224] pad=[0,0,4,4]
+// ==> T1 : EleWise{K, }
+// L<i21:0:576><c:0:1><i4:0:3><i14:0:3>Sum  ...  [(i21 / 9),c,((3 * ((i21 / 3) %
+// 3)) + i4),(i14 + (3 * (i21 % 3)))]
+//     {K}
+// ==> K : Input Tensor shape=[64,1,9,9] pad=[0,0,0,0]
+
+TEST(GuidedDLT, match_ConvToConv_conv) {
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto n = make_ref<VarNode>("n");
+    auto i22 = make_ref<VarNode>("i22");
+    auto i4 = make_ref<VarNode>("i4");
+    auto i14 = make_ref<VarNode>("i14");
+    auto i17 = make_ref<VarNode>("i17");
+    auto i24 = make_ref<VarNode>("i24");
+    auto f = make_ref<VarNode>("f");
+    auto c = make_ref<VarNode>("c");
+    auto A = makeTensor("A", {1, 1, 224, 224}, {0, 0, 4, 4});
+    auto B = make_ref<TensorNode>("B", vector<int>({576, 1, 3, 3}));
+
+    auto subA = makeSubscript(A, {n, c, ((i22 + i4) + -4), ((i14 + i17) + -4)});
+    auto subB = makeSubscript(B, {i24, c, i4, i14});
+    auto range = makeRangeOperator(
+        {{i24, {0, 576}}, {i22, {2, 228}}, {i17, {2, 228}}, {n, {0, 1}}},
+        {{i14, {0, 3}}, {i4, {0, 3}}, {c, {0, 1}}}, subA * subB);
+    dbg(range);
+    // Derivation
+    {
+        Formula conv(range, 0);
+        Derivator derivator(2);
+        Rule8GuidedDLT pass(derivator);
+        auto ret = pass.guidedDLT(conv, 1, conv.root, true);
+        dbg(ret);
+        ASSERT_EQ(ret.size(), 1u);
+        // ASSERT_GE(ret.size(), 1);
+        // EXPECT_EQ(ret.size(), 1);
+        // auto rangeOp = as<RangeOpNode>(ret[0]);
+        // ASSERT_TRUE(rangeOp != nullptr);
+        // EXPECT_EQ(rangeOp->getLoopVarRanges().size(), 4);
+        // EXPECT_EQ(rangeOp->getSumVarRanges().size(), 0);
+        // dbg(rangeOp, rangeOp->getSummand());
+        // auto sub = as<SubscriptNode>(rangeOp->getSummand());
+        // ASSERT_TRUE(sub != nullptr);
+        // auto inner = as<RangeOpNode>(sub->getObject());
+        // ASSERT_TRUE(inner != nullptr);
+        // EXPECT_EQ(inner->getSumVarRanges().size(), 3);
+        // ASSERT_EQ(inner->getLoopVarRanges().size(), 4);
+        // const auto expectedOrder = vector{n, j101, j55, j79};
+        // for (int i = 0; i < 4; ++i) {
+        //     EXPECT_EQ(inner->getLoopVar(i)->getName(),
+        //               expectedOrder[i]->getName());
+        // }
+    }
+}
diff --git a/test/nnet/test_hash.cc b/test/nnet/test_hash.cc
new file mode 100644
index 00000000..e50ea81d
--- /dev/null
+++ b/test/nnet/test_hash.cc
@@ -0,0 +1,65 @@
+#include "nnet/Visitor/HashVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+
+TEST(Hash, Conv2gemm) {
+    int N = 8, H = 224, W = 224, C = 16, F = 32;
+    int R = 3, S = 3;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, C}),
+                                  vector<int>{0, R / 2, S / 2, 0});
+    auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
+
+    auto subA = makeSubscript(A, {n, h + r, w + s, c});
+    auto subK = makeSubscript(K, {r, s, f, c});
+    auto range = makeRangeOperator(
+        {{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+        {{c, {0, C}}, {r, {-R / 2, R / 2 + 1}}, {s, {-S / 2, S / 2 + 1}}},
+        subA * subK);
+    cout << range->toReadable() << endl;
+    auto hash0 = HashVisitor().getHash(range);
+    cout << hash0 << endl;
+
+    subA = makeSubscript(A, {n, h + s, w + r, c});
+    subK = makeSubscript(K, {s, r, f, c});
+    range = makeRangeOperator(
+        {{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+        {{c, {0, C}}, {r, {-R / 2, R / 2 + 1}}, {s, {-S / 2, S / 2 + 1}}},
+        subA * subK);
+    cout << range->toReadable() << endl;
+    auto hash1 = HashVisitor().getHash(range);
+    cout << hash1 << endl;
+
+    subA = makeSubscript(A, {n, s + h, w + r, c});
+    subK = makeSubscript(K, {s, r, f, c});
+    range = makeRangeOperator(
+        {{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+        {{c, {0, C}}, {r, {-R / 2, R / 2 + 1}}, {s, {-S / 2, S / 2 + 1}}},
+        subA * subK);
+    cout << range->toReadable() << endl;
+    auto hash2 = HashVisitor().getHash(range);
+    cout << hash2 << endl;
+
+    subA = makeSubscript(A, {n, s + h, w, c});
+    subK = makeSubscript(K, {s, r, f, c});
+    range = makeRangeOperator(
+        {{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+        {{c, {0, C}}, {r, {-R / 2, R / 2 + 1}}, {s, {-S / 2, S / 2 + 1}}},
+        subA * subK);
+    cout << range->toReadable() << endl;
+    auto hash3 = HashVisitor().getHash(range);
+    cout << hash3 << endl;
+
+    EXPECT_EQ(hash0, hash1);
+    EXPECT_EQ(hash0, hash2);
+    EXPECT_NE(hash0, hash3);
+}
diff --git a/test/nnet/test_matchConv.cc b/test/nnet/test_matchConv.cc
new file mode 100644
index 00000000..1fa446bb
--- /dev/null
+++ b/test/nnet/test_matchConv.cc
@@ -0,0 +1,144 @@
+#include "nnet/Visitor/PatternMatcher.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "nnet/permutation.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+VecExpr matchConv(Derivator &derivator, const RangeOp &rangeOp) {
+    const auto &patternIT = ConvPattern::getPattern();
+    return PatternMatcher(derivator, rangeOp)
+        .matchWithPattern(rangeOp, patternIT);
+}
+
+TEST(MatchConv, Permutation_Generator) {
+    DEFINE_VAR(a);
+    DEFINE_VAR(b);
+    DEFINE_VAR(c);
+    DEFINE_VAR(d);
+    DEFINE_VAR(e);
+    DEFINE_VAR(f);
+    DEFINE_VAR(i0);
+    DEFINE_VAR(i1);
+    DEFINE_VAR(i2);
+    PermutationGenerator gen({{a, b, c}, {d, e, f}},
+                             {{i0, i1, i2}, {i0, i1, i2}});
+    int cnt = 0;
+    do {
+        if (cnt == 6) {
+            auto mapping = gen.get();
+            EXPECT_EQ(mapping[a]->getName(), "i0");
+            EXPECT_EQ(mapping[b]->getName(), "i2");
+            EXPECT_EQ(mapping[c]->getName(), "i1");
+            EXPECT_EQ(mapping[d]->getName(), "i0");
+            EXPECT_EQ(mapping[e]->getName(), "i1");
+            EXPECT_EQ(mapping[f]->getName(), "i2");
+        }
+        if (cnt == 7) {
+            auto mapping = gen.get();
+            EXPECT_EQ(mapping[a]->getName(), "i0");
+            EXPECT_EQ(mapping[b]->getName(), "i2");
+            EXPECT_EQ(mapping[c]->getName(), "i1");
+            EXPECT_EQ(mapping[d]->getName(), "i0");
+            EXPECT_EQ(mapping[e]->getName(), "i2");
+            EXPECT_EQ(mapping[f]->getName(), "i1");
+        }
+        ++cnt;
+    } while (gen.next());
+    EXPECT_EQ(cnt, 6 * 6);
+}
+
+TEST(MatchConv, NoBatch) {
+    DEFINE_VAR(n);
+    DEFINE_VAR(c);
+    DEFINE_VAR(h);
+    DEFINE_VAR(w);
+    DEFINE_VAR(f);
+    DEFINE_VAR(r);
+    DEFINE_VAR(s);
+    int N = 8, C = 12, H = 224, W = 224, F = 16, R = 3, S = 3;
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto B = make_ref<TensorNode>("B", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r, w + s});
+    auto subB = makeSubscript(B, {f, c, r, s});
+    auto rangeOp = makeRangeOperator(
+        {{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+        {{c, {0, C}}, {r, {-R / 2, R / 2}}, {s, {-S / 2, S / 2}}}, subA * subB);
+
+    // Derivation
+    Formula matmul(rangeOp, 0);
+    Derivator derivator;
+    auto results = matchConv(derivator, rangeOp);
+    dbg(results);
+    EXPECT_EQ(results.size(), 1u);
+    auto tensor = as<TensorNode>(results[0]);
+    ASSERT_NE(tensor, nullptr);
+    dbg(tensor->getSource()->toReadable());
+    dbg(tensor->getSource());
+
+    const auto &conv = as<ConvNode>(tensor->getSource());
+    ASSERT_NE(conv, nullptr);
+    // Conv{p = 1, 1, s= 1, 1, d= 1, 1; A K = A<pad=0,0,1,1>, B}
+    ConvNode matchedConv = ConvNode(rangeOp, A, B, 1, 1);
+    EXPECT_EQ(matchedConv, *conv);
+}
+
+// wrong index of kernel
+TEST(MatchConv, Wrong0) {
+    DEFINE_VAR(n);
+    DEFINE_VAR(c);
+    DEFINE_VAR(h);
+    DEFINE_VAR(w);
+    DEFINE_VAR(f);
+    DEFINE_VAR(r);
+    DEFINE_VAR(s);
+    int N = 8, C = 12, H = 224, W = 224, F = 16, R = 3, S = 3;
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto B = make_ref<TensorNode>("B", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r, w + s});
+    auto subB = makeSubscript(B, {c, f, r, s});
+    auto rangeOp =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subB);
+
+    // Derivation
+    Formula matmul(rangeOp, 0);
+    Derivator derivator;
+    auto results = matchConv(derivator, rangeOp);
+    dbg(results);
+    EXPECT_EQ(results.size(), 0u);
+}
+
+// wrong index of input tensor
+TEST(MatchConv, Wrong1) {
+    DEFINE_VAR(n);
+    DEFINE_VAR(c);
+    DEFINE_VAR(h);
+    DEFINE_VAR(w);
+    DEFINE_VAR(f);
+    DEFINE_VAR(r);
+    DEFINE_VAR(s);
+    int N = 8, C = 12, H = 224, W = 224, F = 16, R = 3, S = 3;
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto B = make_ref<TensorNode>("B", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + 2 * r, w + s});
+    auto subB = makeSubscript(B, {f, c, r, s});
+    auto rangeOp =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subB);
+
+    // Derivation
+    Formula matmul(rangeOp, 0);
+    Derivator derivator;
+    auto results = matchConv(derivator, rangeOp);
+    dbg(results);
+    EXPECT_EQ(results.size(), 0u);
+}
diff --git a/test/nnet/test_matchElementWise.cc b/test/nnet/test_matchElementWise.cc
new file mode 100644
index 00000000..1f92712e
--- /dev/null
+++ b/test/nnet/test_matchElementWise.cc
@@ -0,0 +1,102 @@
+#include "nnet/Visitor/CountRoutineVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "nnet/iterator_table.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+
+TEST(MatchElementWise, NoMatch) {
+    int N = 8, H = 224, W = 224, C = 16, F = 32;
+    int R = 9, S = 9;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r, w + s});
+    auto subK = makeSubscript(K, {f, c, r + R / 2, s + S / 2});
+
+    auto range = makeRangeOperator(
+        {{n, {0, N}}, {h, {0, H}}, {w, {0, W}}, {f, {0, F}}},
+        {{c, {0, C}}, {r, {-R / 2, R / 2 + 1}}, {s, {-S / 2, S / 2 + 1}}},
+        subA * subK);
+    // cout << range->toReadable() << endl;
+
+    // Derivation
+    Formula conv_9x9(range, 0);
+    Derivator derivator(1);
+    derivator.search(conv_9x9, 1);
+    bool hasMatch = false;
+    for (const auto &formula : derivator.getCandidates()) {
+        if (CountRoutineVisitor().match(formula.root, 0, 0, 1))
+            hasMatch = true;
+    }
+    // Cannot be matched by a single membound
+    EXPECT_FALSE(hasMatch);
+    derivator.print();
+}
+
+TEST(MatchElementWise, TwoStagesWithPadding) {
+    int N = 8;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A =
+        make_ref<TensorNode>("A", vector<int>({N, N}), vector<int>{0, N / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({N, N}));
+
+    auto innerSub = makeSubscript(A, {n, h});
+    auto innerRange =
+        makeRangeOperator({{n, {0, N}}, {h, {0, N}}}, {}, innerSub);
+    innerRange->setPaddings({0, 2});
+    auto outerSub = makeSubscript(innerRange, {r, s + r});
+    auto outerRange =
+        makeRangeOperator({{r, {0, 4}}, {s, {0, 5}}}, {}, outerSub);
+    // cout << range->toReadable() << endl;
+
+    // Derivation
+    Formula conv_9x9(outerRange, 0);
+    Derivator derivator;
+    derivator.ruleBasedDFS(conv_9x9, 0, {6});
+    EXPECT_EQ(derivator.getNumCandidates(), 1);
+}
+
+TEST(MatchElementWise, TwoStagesWithImperfectedNestedPadding) {
+    int N = 8;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A = make_ref<TensorNode>("A", vector<int>({100, 100}),
+                                  vector<int>{0, N / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({100, 100}));
+
+    auto innerSub = makeSubscript(A, {n, h + n});
+    auto innerRange =
+        makeRangeOperator({{n, {0, 8}}, {h, {0, 8}}}, {}, innerSub);
+    innerRange->setPaddings({0, 2});
+    auto outerSub = makeSubscript(innerRange, {r, s + r});
+    auto outerRange =
+        makeRangeOperator({{r, {0, 4}}, {s, {0, 5}}}, {}, outerSub);
+    // cout << range->toReadable() << endl;
+
+    // Derivation
+    Formula conv_9x9(outerRange, 0);
+    Derivator derivator;
+    derivator.ruleBasedDFS(conv_9x9, 0, {6});
+    EXPECT_EQ(derivator.getNumCandidates(), 0);
+}
\ No newline at end of file
diff --git a/test/nnet/test_matchMatmul.cc b/test/nnet/test_matchMatmul.cc
new file mode 100644
index 00000000..b158ca69
--- /dev/null
+++ b/test/nnet/test_matchMatmul.cc
@@ -0,0 +1,338 @@
+#include "nnet/Visitor/FullPrinterVisitor.h"
+#include "nnet/Visitor/PatternMatcher.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "nnet/iterator_table.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+
+VecExpr matchMatmul(Derivator &derivator, const RangeOp &rangeOp) {
+    const auto &patternIT = MatmulPattern::getMatmulPattern();
+    return PatternMatcher(derivator, rangeOp)
+        .matchWithPattern(rangeOp, patternIT);
+}
+
+TEST(MatchMatmul, NoBatch) {
+    int M = 224, N = 8, K = 16;
+    auto m = make_ref<VarNode>("m");
+    auto n = make_ref<VarNode>("n");
+    auto k = make_ref<VarNode>("k");
+    // Transpose requires the existance of source for inputs
+    auto _A = make_ref<TensorNode>("A_shadow", vector<int>({M, K}));
+    auto _B = make_ref<TensorNode>("B_shadow", vector<int>({N, K}));
+    auto rangeA = makeRangeOperator({{m, {0, M}}, {k, {0, K}}}, {},
+                                    makeSubscript(_A, {m, k}));
+    auto rangeB = makeRangeOperator({{n, {0, N}}, {k, {0, K}}}, {},
+                                    makeSubscript(_A, {n, k}));
+    auto elemA =
+        make_ref<ElementWiseNode>(rangeA, vector<Tensor>{_A}, _A->getShape());
+    auto elemB =
+        make_ref<ElementWiseNode>(rangeB, vector<Tensor>{_B}, _B->getShape());
+    auto A = makeTensor("A", vector<int>({M, K}), {}, elemA);
+    auto B = makeTensor("B", vector<int>({N, K}), {}, elemB);
+
+    auto subA = makeSubscript(A, {m, k});
+    auto subB = makeSubscript(B, {n, k});
+    auto range = makeRangeOperator({{m, {0, M}}, {n, {0, N}}}, {{k, {0, K}}},
+                                   subA * subB);
+
+    // Derivation
+    Formula matmul(range, 0);
+    Derivator derivator;
+    auto results = matchMatmul(derivator, range);
+    // Matmul{bmnk = 1, 224, 8, 16; AB = A, B; transAB = 0, 0}
+    // Matmul{bmnk = 1, 8, 224, 16; AB = B, A; transAB = 0, 0}
+    vector<MatmulNode> answers = {
+        MatmulNode(range, A, B, 1, 224, 8, 16, false, true)};
+    set<MatmulArgs> argSet;
+    for (const auto &result : results) {
+        static int cnt = 0;
+        cout << "========" << ++cnt << endl;
+        std::cout << FullPrinterVisitor().print(result);
+
+        Tensor tensor = as<TensorNode>(result);
+        if (!tensor) {
+            tensor = as<TensorNode>(
+                as<SubscriptNode>(as<RangeOpNode>(result)->getSummand())
+                    ->getObject());
+        }
+        argSet.emplace(as<MatmulNode>(tensor->getSource())->getArgs());
+    }
+    EXPECT_EQ(results.size(), 8u);
+    EXPECT_EQ(argSet.size(), 8u);
+    EXPECT_TRUE(argSet.count({1, 224, 8, 16, false, true}));
+    EXPECT_TRUE(argSet.count({1, 8, 224, 16, false, true}));
+}
+
+TEST(MatchMatmul, Illegal0) {
+    int M = 224, N = 8, K = 16;
+    auto m = make_ref<VarNode>("m");
+    auto n = make_ref<VarNode>("n");
+    auto k = make_ref<VarNode>("k");
+    auto A = make_ref<TensorNode>("A", vector<int>({M, K}));
+    auto B = make_ref<TensorNode>("B", vector<int>({N, K}));
+
+    auto subA = makeSubscript(A, {m, k});
+    auto subB = makeSubscript(B, {k, k});
+    auto range = makeRangeOperator({{m, {0, M}}, {n, {0, N}}}, {{k, {0, K}}},
+                                   subA * subB);
+    // Derivation
+    Formula matmul(range, 0);
+    Derivator derivator;
+    auto results = matchMatmul(derivator, range);
+    vector<MatmulNode> answers = {};
+    EXPECT_EQ(results.size(), answers.size());
+}
+
+TEST(MatchMatmul, Illegal1) {
+    int M = 224, N = 8, K = 16;
+    auto m = make_ref<VarNode>("m");
+    auto n = make_ref<VarNode>("n");
+    auto k = make_ref<VarNode>("k");
+    auto A = make_ref<TensorNode>("A", vector<int>({M, K}));
+    auto B = make_ref<TensorNode>("B", vector<int>({N, K}));
+
+    auto subA = makeSubscript(A, {m, k});
+    auto subB = makeSubscript(B, {n, k});
+    auto range = makeRangeOperator({{m, {0, M}}, {n, {0, N}}, {k, {0, K}}}, {},
+                                   subA * subB);
+    // Derivation
+    Formula matmul(range, 0);
+    Derivator derivator;
+    auto results = matchMatmul(derivator, range);
+    vector<MatmulNode> answers = {};
+    EXPECT_EQ(results.size(), answers.size());
+}
+
+TEST(MatchMatmul, Illegal2) {
+    int M = 224, N = 8, K = 16;
+    auto m = make_ref<VarNode>("m");
+    auto n = make_ref<VarNode>("n");
+    auto k = make_ref<VarNode>("k");
+    auto A = make_ref<TensorNode>("A", vector<int>({M, K}));
+    auto B = make_ref<TensorNode>("B", vector<int>({N, K}));
+
+    auto subA = makeSubscript(A, {m, m + k});
+    auto subB = makeSubscript(B, {n, k});
+    auto range = makeRangeOperator({{m, {0, M}}, {n, {0, N}}}, {{k, {0, K}}},
+                                   subA * subB);
+    // Derivation
+    Formula matmul(range, 0);
+    Derivator derivator;
+    auto results = matchMatmul(derivator, range);
+    // dbg(results.size());
+    // for (const auto &result : results) {
+    //     dbg(result);
+    //     dbg(*result);
+    //     dbg(as<TensorNode>(result)->getShape());
+    //     dbg(as<TensorNode>(result)->getSource());
+    // }
+    vector<MatmulNode> answers = {};
+    EXPECT_EQ(results.size(), answers.size());
+}
+
+TEST(MatchMatmul, Illegal3) {
+    int M = 224, N = 8, K = 16;
+    auto m = make_ref<VarNode>("m");
+    auto n = make_ref<VarNode>("n");
+    auto k = make_ref<VarNode>("k");
+    auto A = make_ref<TensorNode>("A", vector<int>({M, K}));
+    auto B = make_ref<TensorNode>("B", vector<int>({N, K}));
+
+    auto subA = makeSubscript(A, {m, n + k});
+    auto subB = makeSubscript(B, {n, k});
+    auto range = makeRangeOperator({{m, {0, M}}, {n, {0, N}}}, {{k, {0, K}}},
+                                   subA * subB);
+    // Derivation
+    Formula matmul(range, 0);
+    Derivator derivator;
+    auto results = matchMatmul(derivator, range);
+    // dbg(results.size());
+    // for (const auto &result : results) {
+    //     dbg(result);
+    //     dbg(*result);
+    //     dbg(as<TensorNode>(result)->getShape());
+    //     dbg(as<TensorNode>(result)->getSource());
+    // }
+    vector<MatmulNode> answers = {};
+    EXPECT_EQ(results.size(), answers.size());
+}
+
+// Different position of the appearance
+TEST(MatchMatmul, Illegal4) {
+    int M = 224, N = 8, K = 16;
+    auto m = make_ref<VarNode>("m");
+    auto n = make_ref<VarNode>("n");
+    auto k = make_ref<VarNode>("k");
+    auto A = make_ref<TensorNode>("A", vector<int>({M, K}));
+    auto B = make_ref<TensorNode>("B", vector<int>({N, K}));
+
+    auto subA = makeSubscript(A, {m, k});
+    auto subB = makeSubscript(B, {k, n});
+    auto range = makeRangeOperator({{m, {0, M}}, {n, {0, N}}}, {{k, {0, K}}},
+                                   subA * subB);
+    // Derivation
+    Formula matmul(range, 0);
+    Derivator derivator;
+    auto results = matchMatmul(derivator, range);
+    // dbg(results.size());
+    // for (const auto &result : results) {
+    //     dbg(result);
+    //     dbg(*result);
+    //     dbg(as<TensorNode>(result)->getShape());
+    //     dbg(as<TensorNode>(result)->getSource());
+    // }
+    vector<MatmulNode> answers = {};
+    EXPECT_EQ(results.size(), answers.size());
+}
+
+// Different position of the appearance
+TEST(MatchMatmul, IteratorTable1) {
+    int M = 224, N = 8, K = 16;
+    auto m = make_ref<VarNode>("_m");
+    auto n = make_ref<VarNode>("_n");
+    auto k = make_ref<VarNode>("_k");
+    auto A = make_ref<TensorNode>("_A", vector<int>({M, K}));
+    auto B = make_ref<TensorNode>("_B", vector<int>({N, K}));
+
+    auto subA = makeSubscript(A, {m, k});
+    auto subB = makeSubscript(B, {n, k});
+    auto range = makeRangeOperator({{m, {0, M}}, {n, {0, N}}}, {{k, {0, K}}},
+                                   subA * subB);
+    class IteratorTable exprIT;
+    ASSERT_TRUE(exprIT.analyzeExpr(range));
+    exprIT.buildTable({0, 1});
+    auto const &[posTable, iterInTensorDim, strideInTensor] =
+        exprIT.getTables();
+    // dbg(posTable, iterInTensorDim, strideInTensor);
+    EXPECT_EQ(posTable.size(), 8u);
+    for (int i = 0; i < 8; ++i) {
+        if (i == 3 || i == 5 || i == 6)
+            EXPECT_EQ(posTable[i].size(), 1u);
+        else
+            EXPECT_EQ(posTable[i].size(), 0u);
+    }
+    // iterInTensorDim = {{{"_m"}, {"_k"}}, {{"_n"}, {"_k"}}}
+    EXPECT_EQ(iterInTensorDim.size(), 2u);
+    for (int i = 0; i < 2; ++i) {
+        EXPECT_EQ(iterInTensorDim[i].size(), 2u);
+        for (int j = 0; j < 2; ++j)
+            EXPECT_EQ(iterInTensorDim[i][j].size(), 1u);
+    }
+    EXPECT_TRUE(iterInTensorDim[0][0][0]->equal(m));
+    EXPECT_TRUE(iterInTensorDim[0][1][0]->equal(k));
+    EXPECT_TRUE(iterInTensorDim[1][0][0]->equal(n));
+    EXPECT_TRUE(iterInTensorDim[0][1][0]->equal(k));
+    // strideInTensor = {{"_k", {1, 1}}, {"_m", {16, 0}}, {"_n", {0, 16}}}
+    EXPECT_EQ(strideInTensor.size(), 3u);
+    EXPECT_EQ(strideInTensor.at(k)[0], 1);
+    EXPECT_EQ(strideInTensor.at(k)[1], 1);
+    EXPECT_EQ(strideInTensor.at(m)[0], 16);
+    EXPECT_EQ(strideInTensor.at(m)[1], 0);
+    EXPECT_EQ(strideInTensor.at(n)[0], 0);
+    EXPECT_EQ(strideInTensor.at(n)[1], 16);
+}
+
+// Different position of the appearance
+TEST(MatchMatmul, IteratorTable2) {
+    int M = 224, N = 8, K = 16;
+    auto m = make_ref<VarNode>("_m");
+    auto n = make_ref<VarNode>("_n");
+    auto k = make_ref<VarNode>("_k");
+    auto c2 = make_ref<ConstantNode>(2);
+    auto A = make_ref<TensorNode>("_A", vector<int>({M, K}));
+    auto B = make_ref<TensorNode>("_B", vector<int>({N, K}));
+
+    auto subA = makeSubscript(A, {m, k + m});
+    auto subB = makeSubscript(B, {n, c2 * (k + c2)});
+    auto range = makeRangeOperator({{m, {0, M}}, {n, {0, N}}}, {{k, {0, K}}},
+                                   subA * subB);
+    class IteratorTable exprIT;
+    ASSERT_TRUE(exprIT.analyzeExpr(range));
+    exprIT.buildTable({0, 1});
+    auto const &[posTable, iterInTensorDim, strideInTensor] =
+        exprIT.getTables();
+    // dbg(posTable, iterInTensorDim, strideInTensor);
+    EXPECT_EQ(posTable.size(), 8u);
+    for (int i = 0; i < 8; ++i) {
+        if (i == 3 || i == 5 || i == 6)
+            EXPECT_EQ(posTable[i].size(), 1u);
+        else
+            EXPECT_EQ(posTable[i].size(), 0u);
+    }
+    // iterInTensorDim = {{{"_m"}, {"_k"}}, {{"_n"}, {"_k"}}}
+    EXPECT_EQ(iterInTensorDim.size(), 2u);
+    for (int i = 0; i < 2; ++i) {
+        EXPECT_EQ(iterInTensorDim[i].size(), 2u);
+        for (int j = 0; j < 2; ++j)
+            if (i == 0 && j == 1)
+                EXPECT_EQ(iterInTensorDim[i][j].size(), 2u);
+            else
+                EXPECT_EQ(iterInTensorDim[i][j].size(), 1u);
+    }
+    EXPECT_TRUE(iterInTensorDim[0][0][0]->equal(m));
+    EXPECT_TRUE(iterInTensorDim[0][1][0]->equal(k));
+    EXPECT_TRUE(iterInTensorDim[1][0][0]->equal(n));
+    EXPECT_TRUE(iterInTensorDim[0][1][0]->equal(k));
+    // strideInTensor = {{"_k", {1, 1}}, {"_m", {16, 0}}, {"_n", {0, 16}}}
+    EXPECT_EQ(strideInTensor.size(), 3u);
+    EXPECT_EQ(strideInTensor.at(k)[0], 1);
+    EXPECT_EQ(strideInTensor.at(k)[1], 2);
+    EXPECT_EQ(strideInTensor.at(m)[0], 17);
+    EXPECT_EQ(strideInTensor.at(m)[1], 0);
+    EXPECT_EQ(strideInTensor.at(n)[0], 0);
+    EXPECT_EQ(strideInTensor.at(n)[1], 16);
+}
+
+// TEST(MatchMatmul, NoBatch_Traspose) {
+//     int M = 224, N = 8, K = 16;
+//     auto m = make_ref<VarNode>("m");
+//     auto n = make_ref<VarNode>("n");
+//     auto k = make_ref<VarNode>("k");
+//     auto A = make_ref<TensorNode>("A", vector<int>({M, K}));
+//     auto B = make_ref<TensorNode>("B", vector<int>({N, K}));
+
+//     auto subA = makeSubscript(A, {m, k});
+//     auto subB = makeSubscript(B, {n, k});
+//     auto rangeA = makeRangeOperator({{m, {0, M}}, {k, {0, K}}}, {}, subA);
+//     auto rangeB = makeRangeOperator({{n, {0, N}}, {k, {0, K}}}, {}, subB);
+//     auto ewA = make_ref<ElementWiseNode>(rangeA, vector<Tensor>{A},
+//                                             rangeA->getOutputShape());
+//     auto ewB = make_ref<ElementWiseNode>(rangeB, vector<Tensor>{B},
+//                                             rangeB->getOutputShape());
+//     auto tensorA = makeTensor("TA", A->getShape(), {}, ewA);
+//     auto tensorB = makeTensor("TB", B->getShape(), {}, ewB);
+//     auto subRangeA = makeSubscript(tensorA, {m, k});
+//     auto subRangeB = makeSubscript(tensorB, {n, k});
+//     auto range = makeRangeOperator({{m, {0, M}}, {n, {0, N}}}, {{k, {0, K}}},
+//                                    subRangeA * subRangeB);
+
+//     // Derivation
+//     Formula matmul(range, 0);
+//     Derivator derivator;
+//     auto results = derivator.matchMatmul(range);
+//     // Matmul{bmnk = 1, 224, 8, 16; AB = A, B; transAB = 0, 0}
+//     // Matmul{bmnk = 1, 8, 224, 16; AB = B, A; transAB = 0, 0}
+//     EXPECT_EQ(results.size(), 8);
+//     vector<MatmulNode> answers = {
+//         MatmulNode(range, {A, B}, 1, 224, 8, 16, false, false)};
+//     // tensor permutation is diabled
+//     // MatmulNode(range, {B, A}, 1, 8, 224, 16, false, false)};
+//     for (const auto &result : results) {
+//         dbg(result);
+//         dbg(FullPrinterVisitor().print(result));
+//     }
+//     // for (const auto &ans : answers) {
+//     //     bool matched = false;
+//     //     for (const auto &result : results) {
+//     //         FullPrinterVisitor().print(result);
+//     //         auto resultMatmul = //
+//     //         as<MatmulNode>(as<TensorNode>(result)->getSource());
+//     //         EXPECT_TRUE(resultMatmul != nullptr);
+//     //         if (ans == *resultMatmul)
+//     //             matched = true;
+//     //     }
+//     //     EXPECT_TRUE(matched);
+//     // }
+// }
diff --git a/test/nnet/test_matchReshape.cc b/test/nnet/test_matchReshape.cc
new file mode 100644
index 00000000..e3024bdb
--- /dev/null
+++ b/test/nnet/test_matchReshape.cc
@@ -0,0 +1,150 @@
+#include "nnet/Visitor/MatchReshapeVisitor.h"
+#include "nnet/expr.h"
+#include "nnet/routine.h"
+#include "nnet/test.h"
+#include "gtest/gtest.h"
+#include <chrono>
+using namespace nnet;
+using namespace std;
+
+TEST(MatchReshape, ElementWise_NHWC) {
+    DEFINE_VAR(i, c);
+    auto A = make_ref<TensorNode>("A", vector<int>({1, 7, 7, 512}));
+    auto subA = makeSubscript(A, {i / 49, i / 7, i % 7, c});
+    auto expr = makeRangeOperator({{i, {0, 49}}, {c, {0, 512}}}, {}, subA);
+    auto matchReshapeVisitor = MatchReshapeVisitor();
+    EXPECT_TRUE(matchReshapeVisitor(expr));
+}
+
+TEST(MatchReshape, ElementWise_with_Sum) {
+    DEFINE_VAR(n, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>{49, 512});
+    auto subA = makeSubscript(
+        A, {(49 * n + 7 * (h + r)) + (w + s), ((512 * r) + (512 * s)) + f});
+    auto expr = makeRangeOperator(
+        {{n, {0, 1}}, {h, {0, 7}}, {w, {0, 7}}, {f, {0, 512}}},
+        {{r, {0, 1}}, {s, {0, 1}}}, subA);
+    auto matchReshapeVisitor = MatchReshapeVisitor();
+    EXPECT_TRUE(matchReshapeVisitor(expr));
+}
+
+// clang-format off
+// One candiate of TEST(Conv2gemm1x1, NCHW_FCRS_search)
+// ==> ROOT
+// T26
+// ==> T26 : EleWise{T15, }
+// L<n:0:1><f:0:512><h:0:7><w:0:7>Sum<r:0:1><s:0:1>  ...  [((f + r) + s),(((49 * n) + (7 * (h + r))) + (w + s))]
+//     {T15}
+// ==> T15 : Matmul{bmnk = 1, 512, 49, 512; AB = T3, T14; transAB = 0, 0}
+// L<transA:0:101><transB:0:100><swapAB:0:101>Sum  ...  [transA,transB]
+//     {L<i39:0:49><i38:0:512>Sum<c:0:512>
+//     {({T4}[i39, c] * {T3}[i38, c])}}
+// ==> T3 : EleWise{K, }
+// L<i38:0:512><c:0:512>Sum  ...  [i38,c,(i38 % 1),(i38 % 1)]
+//     {K}
+// ==> T14 : EleWise{A, }
+// L<c:0:512><i39:0:49>Sum  ...  [i39,c]
+//     {L<i39:0:49><c:0:512>Sum  ...  [(i39 / 49),c,(i39 / 7),(i39 % 7)]
+//     {A}}
+// ==> K : Input Tensor shape=[512,512,1,1] pad=[0,0,0,0]
+// ==> A : Input Tensor shape=[1,512,7,7] pad=[0,0,0,0]
+// clang-format on
+
+TEST(MatchReshape, Conv2gemm_1x1_NCHW_K) {
+    // ==> T3 : EleWise{K, }
+    // L<i38:0:512><c:0:512>Sum  ...  [i38,c,(i38 % 1),(i38 % 1)]
+    //     {K}
+    // ==> K : Input Tensor shape=[512,512,1,1] pad=[0,0,0,0]
+    DEFINE_VAR(i, c);
+    auto A = make_ref<TensorNode>("K", vector<int>({512, 512, 1, 1}));
+    auto subA = makeSubscript(A, {i, c, i % 1, i % 1});
+    auto expr = makeRangeOperator({{i, {0, 512}}, {c, {0, 512}}}, {}, subA);
+    auto matchReshapeVisitor = MatchReshapeVisitor();
+    EXPECT_TRUE(matchReshapeVisitor(expr));
+}
+
+TEST(MatchReshape, Conv2gemm_1x1_NCHW_A_merged) {
+    // ==> T6 : EleWise{A, }
+    // L<c:0:512><i39:0:49>Sum  ...  [i39,c]
+    //     {L<i39:0:49><c:0:512>Sum  ...  [(i39 / 49),c,(i39 / 7),(i39 % 7)]
+    //     {A}}
+    // ==> A : Input Tensor shape=[1,512,7,7] pad=[0,0,0,0]
+    DEFINE_VAR(i, c);
+    auto A = make_ref<TensorNode>("A", vector<int>({1, 512, 7, 7}));
+    auto subA = makeSubscript(A, {(i / 49), c, (i / 7), (i % 7)});
+    auto expr = makeRangeOperator({{c, {0, 512}}, {i, {0, 49}}}, {}, subA);
+    auto matchReshapeVisitor = MatchReshapeVisitor();
+    EXPECT_TRUE(matchReshapeVisitor(expr));
+}
+
+TEST(MatchReshape, Conv2gemm_1x1_NCHW_A) {
+    // ==> T14 : EleWise{A, }
+    // L<c:0:512><i39:0:49>Sum  ...  [i39,c]
+    //     {L<i39:0:49><c:0:512>Sum  ...  [(i39 / 49),c,(i39 / 7),(i39 % 7)]
+    //     {A}}
+    // ==> A : Input Tensor shape=[1,512,7,7] pad=[0,0,0,0]
+    DEFINE_VAR(i, c);
+    auto A = make_ref<TensorNode>("A", vector<int>({1, 512, 7, 7}));
+    auto subA = makeSubscript(A, {(i / 49), c, (i / 7), (i % 7)});
+    auto inner = makeRangeOperator({{i, {0, 49}}, {c, {0, 512}}}, {}, subA);
+    auto subInner = makeSubscript(inner, {i, c});
+    auto outer = makeRangeOperator({{c, {0, 512}}, {i, {0, 49}}}, {}, subInner);
+    EXPECT_TRUE(MatchReshapeVisitor()(outer));
+}
+
+TEST(MatchReshape, Conv2gemm_1x1_NCHW_Output) {
+    // ==> T26 : EleWise{T15, }
+    // L<n:0:1><f:0:512><h:0:7><w:0:7>Sum<r:0:1><s:0:1>  ...  [((f + r) +
+    // s),(((49 * n) + (7 * (h + r))) + (w + s))] {T15}
+    // ==> T15 : Matmul{bmnk = 1, 512, 49, 512; AB = T3, T14; transAB = 0, 0}
+    DEFINE_VAR(n, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>({512, 49}));
+    auto subA = makeSubscript(
+        A, {((f + r) + s), (((49 * n) + (7 * (h + r))) + (w + s))});
+    auto expr = makeRangeOperator(
+        {{n, {0, 1}}, {f, {0, 512}}, {h, {0, 7}}, {w, {0, 7}}},
+        {{r, {0, 1}}, {s, {0, 1}}}, subA);
+    auto matchReshapeVisitor = MatchReshapeVisitor();
+    EXPECT_TRUE(matchReshapeVisitor(expr));
+}
+
+TEST(MatchReshape, Conv2gemm_1x1_NCHW_Output_wrong) {
+    // ==> T22 : EleWise{T7, }
+    // L<n:0:1><h:0:7><w:0:7><f:0:512>Sum<r:0:1><s:0:1>  ...  [(((49 * n) + (7 *
+    // (h + r))) + (w + s)),((f + r) + s)]
+    //     {T7}
+    // ==> T7 : Matmul{bmnk = 1, 49, 512, 512; AB = T6, T3; transAB = 1, 1}
+    DEFINE_VAR(n, h, w, f, r, s);
+    auto A = make_ref<TensorNode>("A", vector<int>({49, 512}));
+    auto subA = makeSubscript(
+        A, {(((49 * n) + (6 * (h + r))) + (w + s)), ((f + r) + s)});
+    auto expr = makeRangeOperator(
+        {{n, {0, 1}}, {h, {0, 7}}, {w, {0, 7}}, {f, {0, 512}}},
+        {{r, {0, 1}}, {s, {0, 1}}}, subA);
+    auto matchReshapeVisitor = MatchReshapeVisitor();
+    EXPECT_FALSE(matchReshapeVisitor(expr));
+}
+
+// MemBound[124644277](i0=0, o0=119, exec_time=0.0037384, NNet
+// Inputs=[A<pad=0,0,0,3>,]) L<c:0:2048><i35:0:49>Sum  ...  [i35,c]
+//     {L<i35:0:49><c:0:2048>Sum  ...  [(i35 / 49),c,(i35 / 7),(i35 % 7)]
+//     {A<pad=0,0,0,3>}}
+
+TEST(MatchReshape, Conv2gemm_1x7_A) {
+    //     MemBound[124644277](i0=0, o0=119, exec_time=0.0037384, NNet
+    //     Inputs=[A<pad=0,0,0,3>,])
+    // L<c:0:2048><i35:0:49>Sum  ...  [i35,c]
+    //     {L<i35:0:49><c:0:2048>Sum  ...  [(i35 / 49),c,(i35 / 7),(i35 % 7)]
+    //     {A<pad=0,0,0,3>}}
+    const int N = 1, C = 2048, H = 7, W = 7, R = 1, S = 7; // gcn_Conv_137
+    DEFINE_VAR(i, c);
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto subA = makeSubscript(A, {(i / 49), c, (i / 7), (i % 7)});
+    auto inner = makeRangeOperator({{i, {0, 49}}, {c, {0, 2048}}}, {}, subA);
+    auto subInner = makeSubscript(inner, {i, c});
+    auto outer =
+        makeRangeOperator({{c, {0, 2048}}, {i, {0, 49}}}, {}, subInner);
+    dbg(outer);
+    EXPECT_TRUE(MatchReshapeVisitor()(outer));
+}
\ No newline at end of file
diff --git a/test/nnet/test_mergeStage.cc b/test/nnet/test_mergeStage.cc
new file mode 100644
index 00000000..e681010e
--- /dev/null
+++ b/test/nnet/test_mergeStage.cc
@@ -0,0 +1,61 @@
+#include "core/graph.h"
+#include "core/operator.h"
+#include "core/tensor.h"
+#include "nnet/Visitor/HashVisitor.h"
+#include "nnet/Visitor/MergeMemboundMutator.h"
+#include "nnet/expr.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+TEST(FuseMembound, Relu) {
+    const int n_heads = 8, seq_len = 10000, feat_len = 512;
+    // dilation_heads = 2;
+    const int Batch = n_heads, M = seq_len, K = feat_len, W = 32;
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, K}),
+                                  vector<int>{0, 0, 0});
+
+    auto subA = makeSubscript(A, {b, m, k});
+    auto innerRange = makeRangeOperator(
+        {{b, {0, Batch}}, {m, {0, M}}, {k, {0, K}}}, {}, subA);
+    auto relu = make_ref<FuncNode>(subA, FuncType::Relu);
+    auto range =
+        makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {w, {0, 2 * W + 1}}},
+                          {{k, {0, K}}}, relu);
+    dbg(range);
+    dbg(MergeMemboundMutator({range, innerRange}).merge());
+    cout << MergeMemboundMutator({range, innerRange}).merge()->toReadable()
+         << endl;
+}
+
+TEST(FuseMembound, MemMemFusion) {
+    const int n_heads = 8, seq_len = 100, feat_len = 100;
+    // dilation_heads = 2;
+    const int Batch = n_heads, M = seq_len, K = feat_len;
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, K}),
+                                  vector<int>{0, 0, 0});
+    auto B = make_ref<TensorNode>("B", vector<int>({Batch, K, M}),
+                                  vector<int>{0, 0, 0});
+
+    auto subA = makeSubscript(B, {b, k, m});
+    auto range =
+        makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}}, {{k, {0, K}}}, subA);
+    auto innerRange =
+        makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {k, {0, K}}}, {},
+                          makeSubscript(A, {b, k, m}));
+    dbg(range, innerRange);
+    auto merged = MergeMemboundMutator({range, innerRange}).merge();
+    dbg(merged);
+    RangeOp ans = makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}},
+                                    {{k, {0, K}}}, makeSubscript(A, {b, m, k}));
+    EXPECT_EQ(HashVisitor().getHash(merged), HashVisitor().getHash(ans));
+}
\ No newline at end of file
diff --git a/test/nnet/test_oobChecker.cc b/test/nnet/test_oobChecker.cc
new file mode 100644
index 00000000..d9d494fa
--- /dev/null
+++ b/test/nnet/test_oobChecker.cc
@@ -0,0 +1,54 @@
+#include "nnet/Visitor/CheckOOBVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+TEST(OOB, noOOB) {
+    const int n_heads = 8, seq_len = 10000, feat_len = 512;
+    // dilation_heads = 2;
+    const int Batch = n_heads, M = seq_len, K = feat_len, W = 32, dilation = 4;
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(n);
+    DEFINE_VAR(w);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, 2 * W + 1}),
+                                  vector<int>{0, 0, 0});
+    auto B = make_ref<TensorNode>("B", vector<int>({Batch, M, K}),
+                                  vector<int>{0, dilation * W, 0});
+    auto subA = makeSubscript(A, {b, m, w});
+    // auto subB = makeSubscript(B, {b, m + dilation * (w - W), n});
+    auto subB = makeSubscript(B, {b, m + dilation * w - dilation * W, n});
+    auto range = makeRangeOperator({{b, {0, Batch}}, {m, {0, M}}, {n, {0, K}}},
+                                   {{w, {0, 2 * W + 1}}}, subA * subB);
+    dbg(range);
+
+    CheckOOBVisitor oobchecker;
+    EXPECT_FALSE(oobchecker.checkRangeOp(range));
+}
+
+TEST(OOB, hasOOB) {
+    const int n_heads = 8, seq_len = 10000, feat_len = 512;
+    // dilation_heads = 2;
+    const int Batch = n_heads, M = seq_len, K = feat_len, W = 32, dilation = 4;
+    DEFINE_VAR(b);
+    DEFINE_VAR(m);
+    DEFINE_VAR(n);
+    DEFINE_VAR(w);
+    auto A = make_ref<TensorNode>("A", vector<int>({Batch, M, 2 * W + 1}),
+                                  vector<int>{0, 0, 0});
+    auto B = make_ref<TensorNode>("B", vector<int>({Batch, M, K}),
+                                  vector<int>{0, 0, 0});
+    auto subA = makeSubscript(A, {b, m, w});
+    // auto subB = makeSubscript(B, {b, m + dilation * (w - W), n});
+    auto subB = makeSubscript(B, {b, m - dilation * (w), n});
+    auto range =
+        makeRangeOperator({{b, {0, Batch}}, {m, {0, M + 1}}, {n, {0, K}}},
+                          {{w, {0, 2 * W + 1}}}, subA * subB);
+    dbg(range);
+
+    CheckOOBVisitor oobchecker;
+    EXPECT_TRUE(oobchecker.checkRangeOp(range));
+}
\ No newline at end of file
diff --git a/test/nnet/test_rangeMagnify.cc b/test/nnet/test_rangeMagnify.cc
new file mode 100644
index 00000000..fb56b419
--- /dev/null
+++ b/test/nnet/test_rangeMagnify.cc
@@ -0,0 +1,45 @@
+#include "nnet/Visitor/InputVisitor.h"
+#include "nnet/Visitor/RangeMagnifyVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+TEST(RangeMagnify, Conv5x5) {
+    int N = 1, H = 224, W = 224, C = 16, F = 64;
+    int R = 5, S = 5;
+    auto n = make_ref<VarNode>("n");
+    auto c = make_ref<VarNode>("c");
+    auto h = make_ref<VarNode>("h");
+    auto w = make_ref<VarNode>("w");
+    auto f = make_ref<VarNode>("f");
+    auto r = make_ref<VarNode>("r");
+    auto s = make_ref<VarNode>("s");
+    auto A = make_ref<TensorNode>("A", vector<int>({N, C, H, W}),
+                                  vector<int>{0, 0, R / 2, S / 2});
+    auto K = make_ref<TensorNode>("K", vector<int>({F, C, R, S}));
+
+    auto subA = makeSubscript(A, {n, c, h + r - R / 2, w + s - S / 2});
+    auto subK = makeSubscript(K, {f, c, r, s});
+
+    auto range =
+        makeRangeOperator({{n, {0, N}}, {f, {0, F}}, {h, {0, H}}, {w, {0, W}}},
+                          {{c, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA * subK);
+    // cout << range->toReadable() << endl;
+    auto ret = RangeMagnifyVisitor().magnify(
+        range, {{c, {0, C}}, {r, {0, R + 1}}, {s, {0, S + 1}}});
+    ASSERT_TRUE(ret);
+    EXPECT_EQ(ret->getRange(r), pair(0, R + 1));
+    auto inputs = InputVisitor().getInputs(ret);
+    ASSERT_EQ(inputs.size(), 2u);
+    EXPECT_EQ(inputs[0]->getPadding(0), 0);
+    EXPECT_EQ(inputs[0]->getPadding(1), 0);
+    EXPECT_EQ(inputs[0]->getPadding(2), 3);
+    EXPECT_EQ(inputs[0]->getPadding(3), 3);
+    EXPECT_EQ(inputs[1]->getPadding(0), 0);
+    EXPECT_EQ(inputs[1]->getPadding(1), 0);
+    EXPECT_EQ(inputs[1]->getPadding(2), 1);
+    EXPECT_EQ(inputs[1]->getPadding(3), 1);
+}
diff --git a/test/nnet/test_relaxation.cc b/test/nnet/test_relaxation.cc
new file mode 100644
index 00000000..79c4fa3f
--- /dev/null
+++ b/test/nnet/test_relaxation.cc
@@ -0,0 +1,91 @@
+#include "nnet/Pass/Rule5RangeRelaxation.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "nnet/visitor.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+// TODO: write a test
+// [..net/src/derivator.cc:32 (dfs)] StartDfs = "DFS dep=6" (std::string)
+// [..net/src/derivator.cc:32 (dfs)] origin = ==> ROOT
+// L<n:0:8><h:0:224><w:0:224><f:0:32>Sum<i13:-1:2><i3:-1:2>  ...
+// [i13,i3,n,h,w,f]
+//     {L<i13:-1:2><i3:-1:2><n:0:8><h:0:224><w:0:224><f:0:32>Sum  ...  [((1 * w)
+//     + (3 * i13)),i13,i13,i3,n,h,w,f]
+//     {L<i17:-3:227><i18:-1:2><i13:-1:2><i3:-1:2><n:0:8><h:0:224><w:0:224><f:0:32>Sum
+//     ...  [((1 * h) + (3 * i3)),i3,i17,i18,i13,i3,n,h,w,f]
+//     {L<i21:-3:227><i22:-1:2><i17:-3:227><i18:-1:2><i13:-1:2><i3:-1:2><n:0:8><h:0:224><w:0:224><f:0:32>Sum<i14:-1:2><i4:-1:2><c:0:16>
+//     {({A<pad=0,4,4,0>}[n, c, (i21 + i4), (i14 + i17)] * {K}[f, c, ((3 * i22)
+//     + i4), (i14 + (3 * i18))])}}}}
+// ==> A : Input Tensor shape=[8,16,224,224] pad=[0,4,4,0]
+// ==> K : Input Tensor shape=[32,16,9,9] pad=[0,0,0,0]
+//  (nnet::Formula&)
+// [..net/src/derivator.cc:670 (rule5RangeRelaxation)] msg = "====== END
+// rule5RangeRelaxation: relax iterating ranges i21 (-3,227) to (-1,226), "
+// (std::string)
+// [..net/src/derivator.cc:32 (dfs)] StartDfs = "DFS dep=7" (std::string)
+// [..net/src/derivator.cc:32 (dfs)] origin = ==> ROOT
+// L<n:0:8><h:0:224><w:0:224><f:0:32>Sum<i13:-1:2><i3:-1:2>  ...
+// [i13,i3,n,h,w,f]
+//     {L<i13:-1:2><i3:-1:2><n:0:8><h:0:224><w:0:224><f:0:32>Sum  ...  [((1 * w)
+//     + (3 * i13)),i13,i13,i3,n,h,w,f]
+//     {L<i17:-3:227><i18:-1:2><i13:-1:2><i3:-1:2><n:0:8><h:0:224><w:0:224><f:0:32>Sum
+//     ...  [((1 * h) + (3 * i3)),i3,i17,i18,i13,i3,n,h,w,f]
+//     {L<i21:-1:226><i22:-1:2><i17:-3:227><i18:-1:2><i13:-1:2><i3:-1:2><n:0:8><h:0:224><w:0:224><f:0:32><pad=2,0,0,0,0,0,0,0,0,0,>Sum<i14:-1:2><i4:-1:2><c:0:16>
+//     {({A<pad=0,4,4,0>}[n, c, (i21 + i4), (i14 + i17)] * {K}[f, c, ((3 * i22)
+//     + i4), (i14 + (3 * i18))])}}}}
+// ==> A : Input Tensor shape=[8,16,224,224] pad=[0,4,4,0]
+// ==> K : Input Tensor shape=[32,16,9,9] pad=[0,0,0,0]
+//  (nnet::Formula&)
+TEST(Relaxation, NaiveMatch) {
+    //     [..rc/nnet/derivator.cc:73 (ruleBasedDerivate)] origin = ==> ROOT
+    // L<n:0:8><h:0:224><w:0:224><f:0:32>Sum<i13:0:3><i3:0:3>  ...  [(h + (3 *
+    // i3)),i3,(w + (3 * i13)),i13,n,f]
+    //     {L<i22:0:230><i23:0:3><i17:0:230><i18:0:3><n:0:8><f:0:32>Sum<i14:0:3><i4:0:3><c:0:16>
+    //     {({A<pad=0,0,4,4>}[n, c, ((i22 + i4) + -4), ((i14 + i17) + -4)] *
+    //     {K}[f, c, ((3 * i23) + i4), (i14 + (3 * i18))])}}
+    // ==> A : Input Tensor shape=[8,16,224,224] pad=[0,0,4,4]
+    // ==> K : Input Tensor shape=[32,16,9,9] pad=[0,0,0,0]
+    DEFINE_VAR(n);
+    DEFINE_VAR(h);
+    DEFINE_VAR(w);
+    DEFINE_VAR(f);
+    DEFINE_VAR(i13);
+    DEFINE_VAR(i3);
+    DEFINE_VAR(i22);
+    DEFINE_VAR(i23);
+    DEFINE_VAR(i17);
+    DEFINE_VAR(i18);
+    DEFINE_VAR(i14);
+    DEFINE_VAR(i4);
+    DEFINE_VAR(c);
+    auto A = makeTensor("A", {8, 16, 224, 224}, {0, 0, 4, 4});
+    auto K = makeTensor("K", {32, 16, 9, 9});
+    auto subA = makeSubscript(A, {n, c, ((i22 + i4) + -4), ((i14 + i17) + -4)});
+    auto subK = makeSubscript(K, {f, c, ((3 * i23) + i4), (i14 + (3 * i18))});
+    auto innerRange = makeRangeOperator(
+        {{i22, {0, 230}},
+         {i23, {0, 3}},
+         {i17, {0, 230}},
+         {i18, {0, 3}},
+         {n, {0, 8}},
+         {f, {0, 32}}},
+        {{i14, {0, 3}}, {i4, {0, 3}}, {c, {0, 16}}}, subA * subK);
+    auto subOuter = makeSubscript(
+        innerRange, {(h + (3 * i3)), i3, (w + (3 * i13)), i13, n, f});
+    auto outerRange = makeRangeOperator(
+        {{n, {0, 8}}, {h, {0, 224}}, {w, {0, 224}}, {f, {0, 32}}},
+        {{i13, {0, 3}}, {i3, {0, 3}}}, subOuter);
+    Derivator derivator(0);
+    Formula formula(innerRange, 0);
+    Rule5RangeRelaxation pass(derivator);
+    pass.setEnableLogging(false);
+    pass.setEnableDebug(true);
+    auto ret = pass.rule5RangeRelaxation(formula, 0, formula.root);
+    ASSERT_TRUE(ret);
+    auto rangeOp = as<RangeOpNode>(ret);
+    EXPECT_EQ(rangeOp->getRange(i22), pair(2, 228));
+    EXPECT_EQ(rangeOp->getRange(i17), pair(2, 228));
+}
\ No newline at end of file
diff --git a/test/nnet/test_serializer.cc b/test/nnet/test_serializer.cc
new file mode 100644
index 00000000..26175b55
--- /dev/null
+++ b/test/nnet/test_serializer.cc
@@ -0,0 +1,100 @@
+#include "nnet/Visitor/FullPrinterVisitor.h"
+#include "nnet/Visitor/Serializer.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+//{L<i3:0:2500><i4:0:4><b:0:8><w:0:65>Sum<k:0:512>
+//{({A}[b, (i3 + (2500 * i4)), k] * {B<pad=0,128,0>}[b, ((i3 + (2500 * i4)) +
+// w), k])}}
+// ==> A : Input Tensor shape=[8,10000,512] pad=[0,0,0]
+// ==> B : Input Tensor shape=[8,10000,512] pad=[0,128,0]
+
+Expr buildSimpleExpr() {
+    DEFINE_VAR(b);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    DEFINE_VAR(i3);
+    DEFINE_VAR(i4);
+    auto A = makeTensor("A", {8, 10000, 512}, {0, 0, 0});
+    auto B = makeTensor("B", {8, 10000, 512}, {0, 128, 0});
+    auto subA = makeSubscript(A, {b, (i3 + (2500 * i4)), k});
+    auto subB = makeSubscript(B, {b, ((i3 + (2500 * i4)) + w), k});
+    auto range = makeRangeOperator(
+        {{i3, {0, 2500}}, {i4, {0, 4}}, {b, {0, 8}}, {w, {0, 65}}},
+        {{k, {0, 512}}}, subA * subB);
+    return range;
+}
+
+Expr buildNestedExpr() {
+    DEFINE_VAR(j1);
+    DEFINE_VAR(j2);
+    DEFINE_VAR(j3);
+    // Build a Matmul to verify.
+    const int M = 10000, N = 512, K = 3;
+    auto C = make_ref<TensorNode>("C", vector<int>({M, K}));
+    auto D = make_ref<TensorNode>("D", vector<int>({N, K}));
+    auto F = make_ref<TensorNode>("F", vector<int>({N, K}));
+    auto matmulExpr = makeSubscript(C, {j1, j3}) * makeSubscript(D, {j2, j3});
+    Expr expr = makeRangeOperator({{j1, {0, M}}, {j2, {0, N}}}, {{j3, {0, K}}},
+                                  matmulExpr);
+    auto matmul = make_ref<MatmulNode>(expr, C, D, 1, M, N, K, false, false);
+
+    vector<int> shapeE{N, K};
+    auto ele2 = make_ref<ElementWiseNode>(Expr(), vector{F}, shapeE);
+    auto E = make_ref<TensorNode>("E", shapeE, shapeE, ele2);
+    auto ele1 = make_ref<ElementWiseNode>(expr, vector{E}, shapeE);
+
+    DEFINE_VAR(b);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    DEFINE_VAR(i3);
+    DEFINE_VAR(i4);
+    auto A = makeTensor("A", {8, 10000, 512}, {0, 0, 0}, matmul);
+    auto B = makeTensor("B", {8, 10000, 512}, {0, 128, 0}, ele1);
+    auto subA = makeSubscript(A, {b, (i3 + (2500 * i4)), k});
+    auto subB = makeSubscript(B, {b, ((i3 + (2500 * i4)) + w), k});
+    auto range = makeRangeOperator(
+        {{i3, {0, 2500}}, {i4, {0, 4}}, {b, {0, 8}}, {w, {0, 65}}},
+        {{k, {0, 512}}}, subA * subB);
+    return range;
+}
+
+TEST(Serializer, Serialization) {
+    auto range = buildSimpleExpr();
+    auto isSuccessful = Serializer().serialize(range, "./test_serializer.json");
+    EXPECT_TRUE(isSuccessful);
+}
+
+TEST(Serializer, CompareTwoExprs) {
+    DEFINE_VAR(b);
+    DEFINE_VAR(w);
+    DEFINE_VAR(k);
+    DEFINE_VAR(i3);
+    DEFINE_VAR(i4);
+    auto A = makeTensor("A", {8, 10000, 512}, {0, 0, 0});
+    auto B = makeTensor("B", {8, 10000, 512}, {0, 128, 0});
+    auto subA = makeSubscript(A, {b, (i3 + (2500 * i4)), k});
+    auto subB = makeSubscript(B, {b, ((i3 + (2500 * i4)) + w), k});
+    auto range = makeRangeOperator(
+        {{i3, {0, 2500}}, {i4, {0, 4}}, {b, {0, 8}}, {w, {0, 65}}},
+        {{k, {0, 512}}}, subA * subB);
+    Serializer().serialize(range, "./test_serializer.json");
+    auto expr = Serializer().deserialize("./test_serializer.json");
+
+    EXPECT_EQ(range->toReadable(), expr->toReadable());
+}
+
+TEST(Serializer, Serialization_NestedTensor) {
+    FullPrinterVisitor printer;
+    auto range = buildNestedExpr();
+    auto ans = printer.print(range);
+    dbg(ans);
+    auto isSuccessful = Serializer().serialize(range, "./test_serializer.json");
+    EXPECT_TRUE(isSuccessful);
+    auto exprDeserialized = Serializer().deserialize("./test_serializer.json");
+    auto output = printer.print(exprDeserialized);
+    dbg(output);
+    EXPECT_EQ(output, ans);
+}
\ No newline at end of file
diff --git a/test/nnet/test_simplify.cc b/test/nnet/test_simplify.cc
new file mode 100644
index 00000000..9a9d71f7
--- /dev/null
+++ b/test/nnet/test_simplify.cc
@@ -0,0 +1,221 @@
+#include "nnet/Visitor/CountRoutineVisitor.h"
+#include "nnet/Visitor/SimplifyExprVisitor.h"
+#include "nnet/derivator.h"
+#include "nnet/expr.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+TEST(Simplify, NaiveMatch) {
+    DEFINE_VAR(i14);
+    DEFINE_VAR(i15);
+
+    SimplifyExprVisitor simplifyExprMutator;
+    {
+        auto expr = (i15 - i15) + i14;
+        auto s = simplifyExprMutator.simplify(expr);
+        Var var = as<VarNode>(s);
+        ASSERT_TRUE(var);
+        EXPECT_TRUE(var->getName() == i14);
+    }
+    {
+        auto expr = (i15 - i15) + i14;
+        auto s = simplifyExprMutator.simplify(expr);
+        Var var = as<VarNode>(s);
+        ASSERT_TRUE(var);
+        EXPECT_TRUE(var->getName() == i14);
+    }
+}
+
+TEST(Simplify, caseInConv2Conv) {
+    DEFINE_VAR(i4);
+    DEFINE_VAR(i15);
+    DEFINE_VAR(i16);
+    // cout << range->toReadable() << endl;
+
+    SimplifyExprVisitor simplifyExprMutator;
+    {
+        auto expr1 = ((2 * i15 - (3 * i16)) + ((3 * i16) + i4));
+        auto s = simplifyExprMutator.simplify(expr1);
+        auto root = as<BinaryOpNode>(s);
+        ASSERT_TRUE(root);
+        EXPECT_EQ(root->getOpType(), OpType::Add);
+        auto t1 = as<BinaryOpNode>(root->getLhs());
+        auto t2 = as<BinaryOpNode>(root->getRhs());
+        ASSERT_TRUE((t1 == nullptr) ^ (t2 == nullptr));
+        BinaryOp bop;
+        Var var;
+        if (t1) {
+            bop = t1;
+            var = as<VarNode>(root->getRhs());
+        } else {
+            bop = t2;
+            var = as<VarNode>(root->getLhs());
+        }
+        EXPECT_EQ(var->getName(), i4->getName());
+        Constant constant = as<ConstantNode>(bop->getLhs());
+        Var var2 = as<VarNode>(bop->getRhs());
+        EXPECT_EQ(constant->getValue(), 2);
+        EXPECT_EQ(var2->getName(), i15->getName());
+    }
+}
+
+TEST(Simplify, caseInSG2BMM) {
+    DEFINE_VAR(i6);
+    // cout << range->toReadable() << endl;
+
+    SimplifyExprVisitor simplifyExprMutator;
+    {
+        auto expr1 = ((2500 * (i6 / 2500) + ((i6 % 2500))));
+        auto s = simplifyExprMutator.simplify(expr1);
+        dbg(s);
+        auto root = as<VarNode>(s);
+        ASSERT_TRUE(root);
+        EXPECT_EQ(root->getName(), i6->getName());
+    }
+}
+
+TEST(Simplify, AdvancedDivMod) {
+    DEFINE_VAR(i7);
+    // cout << range->toReadable() << endl;
+
+    SimplifyExprVisitor simplifyExprMutator;
+    {
+        auto expr1 = ((5000 * (i7 / 2500) + 2 * ((i7 % 2500))));
+        auto s = simplifyExprMutator.simplify(expr1);
+        dbg(s);
+        auto root = as<BinaryOpNode>(s);
+        ASSERT_TRUE(root);
+        EXPECT_EQ(root->getOpType(), OpType::Mul);
+        auto t1 = as<ConstantNode>(root->getLhs());
+        auto t2 = as<ConstantNode>(root->getRhs());
+        ASSERT_TRUE((t1 == nullptr) ^ (t2 == nullptr));
+        Constant bop;
+        Var var;
+        if (t1) {
+            bop = t1;
+            var = as<VarNode>(root->getRhs());
+        } else {
+            bop = t2;
+            var = as<VarNode>(root->getLhs());
+        }
+        EXPECT_EQ(var->getName(), i7->getName());
+        EXPECT_EQ(bop->getValue(), 2);
+    }
+}
+
+TEST(Simplify, AdvancedDivMod2) {
+    DEFINE_VAR(i4);
+    DEFINE_VAR(i15);
+    DEFINE_VAR(i16);
+    // cout << range->toReadable() << endl;
+
+    SimplifyExprVisitor simplifyExprMutator;
+    {
+        auto expr1 =
+            ((2 * i15 - (3 * i16)) + (9 * (i16 / 3) + 3 * (i16 % 3) + i4));
+        auto s = simplifyExprMutator.simplify(expr1);
+        auto root = as<BinaryOpNode>(s);
+        ASSERT_TRUE(root);
+        EXPECT_EQ(root->getOpType(), OpType::Add);
+        auto t1 = as<BinaryOpNode>(root->getLhs());
+        auto t2 = as<BinaryOpNode>(root->getRhs());
+        ASSERT_TRUE((t1 == nullptr) ^ (t2 == nullptr));
+        BinaryOp bop;
+        Var var;
+        if (t1) {
+            bop = t1;
+            var = as<VarNode>(root->getRhs());
+        } else {
+            bop = t2;
+            var = as<VarNode>(root->getLhs());
+        }
+        EXPECT_EQ(var->getName(), i4->getName());
+        Constant constant = as<ConstantNode>(bop->getLhs());
+        Var var2 = as<VarNode>(bop->getRhs());
+        EXPECT_EQ(constant->getValue(), 2);
+        EXPECT_EQ(var2->getName(), i15->getName());
+    }
+}
+
+TEST(Simplify, Constant) {
+    DEFINE_VAR(i14);
+    DEFINE_VAR(i15);
+
+    SimplifyExprVisitor simplifyExprMutator;
+    {
+        auto expr = (i15 - i15) + i14 + 1;
+        auto s = simplifyExprMutator.simplify(expr);
+        dbg(expr, s);
+        auto binaryOp = as<BinaryOpNode>(s);
+        ASSERT_TRUE(binaryOp);
+        auto ca = as<ConstantNode>(binaryOp->getLhs());
+        auto cb = as<ConstantNode>(binaryOp->getRhs());
+        EXPECT_TRUE(!ca ^ !cb);
+        if (ca != nullptr)
+            EXPECT_EQ(ca->getValue(), 1);
+        else
+            EXPECT_EQ(cb->getValue(), 1);
+        // EXPECT_TRUE(var->getName() == i14);
+    }
+    {
+        auto expr = -3 + (i15 - i15) + i14 + 1;
+        auto s = simplifyExprMutator.simplify(expr);
+        dbg(expr, s);
+        auto binaryOp = as<BinaryOpNode>(s);
+        ASSERT_TRUE(binaryOp);
+        auto ca = as<ConstantNode>(binaryOp->getLhs());
+        auto cb = as<ConstantNode>(binaryOp->getRhs());
+        EXPECT_TRUE(!ca ^ !cb);
+        int finalConst = -2;
+        if (ca != nullptr)
+            EXPECT_EQ(ca->getValue(), finalConst);
+        else
+            EXPECT_EQ(cb->getValue(), finalConst);
+    }
+}
+
+TEST(Simplify, AdvancedDivMod3Negative_TConv) {
+    DEFINE_VAR(i5);
+    // cout << range->toReadable() << endl;
+
+    SimplifyExprVisitor simplifyExprMutator;
+    {
+        auto expr1 = ((1 * (i5 % -2)) + (2 * (i5 / -2)));
+        auto s = simplifyExprMutator.simplify(expr1);
+        dbg(s);
+        auto root = as<VarNode>(s);
+        ASSERT_TRUE(root);
+        EXPECT_EQ(root->getName(), "i5");
+    }
+}
+
+TEST(Simplify, SingleDivOrMod_TConv) {
+    DEFINE_VAR(i5);
+    SimplifyExprVisitor simplifyExprMutator;
+    {
+        auto expr1 = 1 * (i5 / 2);
+        auto s = simplifyExprMutator.simplify(expr1);
+        dbg(s);
+        auto root = as<BinaryOpNode>(s);
+        ASSERT_TRUE(root);
+        EXPECT_EQ(root->getOpType(), OpType::Div);
+        auto var = as<VarNode>(root->getLhs());
+        auto divisor = as<ConstantNode>(root->getRhs());
+        EXPECT_EQ(var, "i5");
+        EXPECT_EQ(divisor->getValue(), 2);
+    }
+    {
+        auto expr1 = 1 * (i5 % 2);
+        auto s = simplifyExprMutator.simplify(expr1);
+        dbg(s);
+        auto root = as<BinaryOpNode>(s);
+        ASSERT_TRUE(root);
+        EXPECT_EQ(root->getOpType(), OpType::Mod);
+        auto var = as<VarNode>(root->getLhs());
+        auto divisor = as<ConstantNode>(root->getRhs());
+        EXPECT_EQ(var, "i5");
+        EXPECT_EQ(divisor->getValue(), 2);
+    }
+}
\ No newline at end of file
diff --git a/test/nnet/test_subset.cc b/test/nnet/test_subset.cc
new file mode 100644
index 00000000..eb8d11a5
--- /dev/null
+++ b/test/nnet/test_subset.cc
@@ -0,0 +1,19 @@
+#include "nnet/dlt.h"
+#include "nnet/expr.h"
+#include "nnet/permutation.h"
+#include "gtest/gtest.h"
+using namespace nnet;
+using namespace std;
+#define DEFINE_VAR(name) auto name = make_ref<VarNode>(#name);
+
+TEST(Subset, Simple) {
+    SubsetGenerator<string> gen{{"a", "b", "c"}};
+    int cnt = 0;
+    do {
+        ++cnt;
+        if (cnt == 1) {
+            EXPECT_EQ(gen.get()[0], "a");
+        }
+    } while (gen.next());
+    EXPECT_EQ(cnt, 8 - 2);
+}
\ No newline at end of file
diff --git a/test/script/clang_format_inplace.sh b/test/script/clang_format_inplace.sh
new file mode 100755
index 00000000..49b2e3d0
--- /dev/null
+++ b/test/script/clang_format_inplace.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]:-$0}")" &>/dev/null && pwd 2>/dev/null)"
+PET_HOME="$(readlink -f ${script_dir}/../..)"
+find ${PET_HOME}/src ${PET_HOME}/include ${PET_HOME}/test  -iname *.h -o -iname *.cc | xargs clang-format -i