InfiniTensor/test/nnet/test_TConv2gemm.cc

391 lines
15 KiB
C++

#include "nnet/Visitor/CountRoutineVisitor.h"
#include "nnet/Visitor/GetTensorsVisitor.h"
#include "nnet/Visitor/Interpreter.h"
#include "nnet/derivator.h"
#include "nnet/expr.h"
#include "nnet/test.h"
#include "gtest/gtest.h"
using namespace nnet;
using namespace std;
TEST(Conv2conv, TConv4x4_NHWC_innerStage_RuleBased) {
const int N = 1, H = 2, W = 2, C = 256, F = 448;
const int R = 4, S = 4;
const int OH = 2 * H, OW = 2 * W;
DEFINE_VAR(n, c, f, r, s, x1, x2, y1, y2);
DEFINE_VAR(i2, i4);
// dilation * (kernel_size - 1) - padding
int padding = 1 * (R - 1) - 1;
auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
vector<int>{0, padding, padding, 0});
auto K = make_ref<TensorNode>("K", vector<int>({F, R, S, C}));
auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
auto subK = makeSubscript(
K, {f, -2 * r + (-1) * x2 + (R - 1), -2 * s + (-1) * y2 + (S - 1), c});
auto range = makeRangeOperator(
{
{n, {0, N}},
{c, {0, C}},
{x1, {0, OH / 2 + 1}},
{x2, {0, 2}},
{y1, {0, OW / 2 + 1}},
{y2, {0, 2}},
},
{{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
dbg(range);
const vector<int> rules{3, 2, 2, 2, 2, 5, 8, 8, 6, 90};
Formula conv_9x9(range, 0);
Derivator derivator;
derivator.ruleBasedDFS(
conv_9x9, 0, rules,
{{1, {x1, r}}, {2, {y1, s}}, {3, {x2, i2}}, {4, {y2, i4}}});
EXPECT_EQ(derivator.getSearchedMaxDepth(), ((int)rules.size()));
ASSERT_GE(derivator.getNumCandidates(), 1);
const auto &formula = derivator.getCandidates().front();
EXPECT_TRUE(CountRoutineVisitor().match(formula.root, 1, 0, 3));
derivator.print();
}
TEST(Conv2conv, TConv4x4_NHWC_RuleBased) {
const int N = 1, H = 2, W = 2, C = 256, F = 448;
const int R = 4, S = 4;
const int OH = 2 * H, OW = 2 * W;
DEFINE_VAR(n, c, h, w, f, r, s);
DEFINE_VAR(x1, x2, y1, y2);
// dilation * (kernel_size - 1) - padding
int padding = 1 * (R - 1) - 1;
auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
vector<int>{0, padding, padding, 0});
auto K = make_ref<TensorNode>("K", vector<int>({F, R, S, C}));
auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
auto subK = makeSubscript(
K, {f, -2 * r + (-1) * x2 + (R - 1), -2 * s + (-1) * y2 + (S - 1), c});
// auto range =
// makeRangeOperator({{n, {0, N}}, {c, {0, H}}, {w, {0, W}}, {f, {0,
// F}}},
// {{f, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA *
// subK);
auto range = makeRangeOperator(
{
{n, {0, N}},
{x1, {0, OH / 2 + 1}},
{x2, {0, 2}},
{y1, {0, OW / 2 + 1}},
{y2, {0, 2}},
{c, {0, C}},
},
{{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
auto subOuter = makeSubscript(
range, {n, (h + 1) / 2, (h + 1) % 2, (w + 1) / 2, (w + 1) % 2, c});
auto outerRange = makeRangeOperator(
{
{n, {0, N}},
{h, {0, OH}},
{w, {0, OW}},
{c, {0, C}},
},
{}, subOuter);
dbg(outerRange);
// Derivation: this work without padding check in stage merging
// const vector<int> rules{1, 1, 3, 2, 2, 5, 2, 2, 6, 4, 4, 4, 4, 6};
// Before Guided DLT seperated from rule2VarMerging
// const vector<int> rules{1, 1, 3, 2, 2, 5, 2, 2, 6, 6};
const vector<int> rules{3, 2, 2, 2, 2, 5, 8, 8, 6, 91, 90};
Formula conv_9x9(outerRange, 0);
Derivator derivator;
// derivator.ruleBasedDFS(conv_9x9, 0, rules,
// {{1, {"x1", "r"}},
// {2, {"y1", "s"}},
// {3, {"x2", "i2"}},
// {4, {"y2", "i4"}}});
derivator.ruleBasedDFS(conv_9x9, 0, rules);
EXPECT_EQ(derivator.getSearchedMaxDepth(), ((int)rules.size()));
ASSERT_GE(derivator.getNumCandidates(), 1);
const auto &formula = derivator.getCandidates().front();
EXPECT_TRUE(CountRoutineVisitor().match(formula.root, 1, 0, 3));
derivator.print();
}
TEST(Conv2conv, TConv4x4_BS16_NHWC_RuleBased) {
const int N = 16, H = 2, W = 2, C = 256, F = 448;
const int R = 4, S = 4;
const int OH = 2 * H, OW = 2 * W;
DEFINE_VAR(n, c, h, w, f, r, s);
DEFINE_VAR(x1, x2, y1, y2);
// dilation * (kernel_size - 1) - padding
int padding = 1 * (R - 1) - 1;
auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
vector<int>{0, padding, padding, 0});
auto K = make_ref<TensorNode>("K", vector<int>({F, R, S, C}));
auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
auto subK = makeSubscript(
K, {f, -2 * r + (-1) * x2 + (R - 1), -2 * s + (-1) * y2 + (S - 1), c});
// auto range =
// makeRangeOperator({{n, {0, N}}, {c, {0, H}}, {w, {0, W}}, {f, {0,
// F}}},
// {{f, {0, C}}, {r, {0, R}}, {s, {0, S}}}, subA *
// subK);
auto range = makeRangeOperator(
{
{n, {0, N}},
{x1, {0, OH / 2 + 1}},
{x2, {0, 2}},
{y1, {0, OW / 2 + 1}},
{y2, {0, 2}},
{c, {0, C}},
},
{{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
auto subOuter = makeSubscript(
range, {n, (h + 1) / 2, (h + 1) % 2, (w + 1) / 2, (w + 1) % 2, c});
auto outerRange = makeRangeOperator(
{
{n, {0, N}},
{h, {0, OH}},
{w, {0, OW}},
{c, {0, C}},
},
{}, subOuter);
dbg(outerRange);
// Derivation: this work without padding check in stage merging
// const vector<int> rules{1, 1, 3, 2, 2, 5, 2, 2, 6, 4, 4, 4, 4, 6};
// Before Guided DLT seperated from rule2VarMerging
// const vector<int> rules{1, 1, 3, 2, 2, 5, 2, 2, 6, 6};
const vector<int> rules{3, 2, 2, 2, 2, 5, 8, 8, 6, 91, 90};
Formula conv_9x9(outerRange, 0);
Derivator derivator;
// derivator.ruleBasedDFS(conv_9x9, 0, rules,
// {{1, {"x1", "r"}},
// {2, {"y1", "s"}},
// {3, {"x2", "i2"}},
// {4, {"y2", "i4"}}});
derivator.ruleBasedDFS(conv_9x9, 0, rules);
EXPECT_EQ(derivator.getSearchedMaxDepth(), ((int)rules.size()));
ASSERT_GE(derivator.getNumCandidates(), 1);
const auto &formula = derivator.getCandidates().front();
EXPECT_TRUE(CountRoutineVisitor().match(formula.root, 1, 0, 3));
derivator.print();
}
// Warn: F is the number of input channels, which is inversed compared with
// normal Conv.
// Our data layout: NHWF -> NHWC, FRSC
// Pytorch data layout: NFHW -> NCHW, FCRS
RangeOp buildTConv4x4_NHWF_FRSC(const int N, const int C, const int H,
const int W, const int F, const int R,
const int S) {
assert(R == 4 && S == 4);
const int OH = 2 * H, OW = 2 * W;
DEFINE_VAR(n, c, h, w, f, r, s);
DEFINE_VAR(x1, x2, y1, y2, i2, i4);
// dilation * (kernel_size - 1) - padding
int padding = 1 * (R - 1) - 1;
auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
vector<int>{0, padding, padding, 0});
auto K = make_ref<TensorNode>("K", vector<int>({F, R, S, C}));
auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
auto subK =
makeSubscript(K, {f, (R - 2) - 2 * r + x2, (S - 2) - 2 * s + y2, c});
// x1=(h+1)//2, x2=(h+1)%2, y1=(w+1)//2
auto range1 = makeRangeOperator(
{
{n, {0, N}},
{c, {0, C}},
{x1, {0, OH / 2 + 1}},
{x2, {0, 2}},
{y1, {0, OW / 2 + 1}},
{y2, {0, 2}},
},
{{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
dbg(range1);
auto sub0 = makeSubscript(
range1, {n, c, (h + 1) / 2, (h + 1) % 2, (w + 1) / 2, (w + 1) % 2});
auto range0 = makeRangeOperator(
{{n, {0, N}}, {h, {0, OH}}, {w, {0, OW}}, {c, {0, C}}}, {}, sub0);
return range0;
}
// Correct input expression
// Warn: F is the number of input channels, which is inversed compared with
// normal Conv.
// Our data layout: NHWF -> NHWC, RSFC
// Pytorch data layout: NFHW -> NCHW, FCRS
RangeOp buildTConv4x4_NHWF_RSFC(const int N, const int C, const int H,
const int W, const int F, const int R,
const int S) {
assert(R == 4 && S == 4);
const int OH = 2 * H, OW = 2 * W;
DEFINE_VAR(n, c, h, w, f, r, s);
DEFINE_VAR(x1, x2, y1, y2, i2, i4);
// dilation * (kernel_size - 1) - padding
int padding = 1 * (R - 1) - 1;
auto A = make_ref<TensorNode>("A", vector<int>({N, H, W, F}),
vector<int>{0, padding, padding, 0});
auto K = make_ref<TensorNode>("K", vector<int>({R, S, F, C}));
auto subA = makeSubscript(A, {n, x1 + r - 1, y1 + s - 1, f});
auto subK =
makeSubscript(K, {(R - 2) - 2 * r + x2, (S - 2) - 2 * s + y2, f, c});
// x1=(h+1)//2, x2=(h+1)%2, y1=(w+1)//2
auto range1 = makeRangeOperator(
{
{n, {0, N}},
{c, {0, C}},
{x1, {0, OH / 2 + 1}},
{x2, {0, 2}},
{y1, {0, OW / 2 + 1}},
{y2, {0, 2}},
},
{{f, {0, F}}, {r, {0, R / 2}}, {s, {0, S / 2}}}, subA * subK);
dbg(range1);
auto sub0 = makeSubscript(
range1, {n, c, (h + 1) / 2, (h + 1) % 2, (w + 1) / 2, (w + 1) % 2});
auto range0 = makeRangeOperator(
{{n, {0, N}}, {h, {0, OH}}, {w, {0, OW}}, {c, {0, C}}}, {}, sub0);
return range0;
}
TEST(TConv2gemm, TConv4x4_NHWF_FRSC_correctness_of_input_expr) {
const int N = 1, H = 2, W = 2, C = 2, F = 3;
const int R = 4, S = 4;
RangeOp range0 = buildTConv4x4_NHWF_FRSC(N, C, H, W, F, R, S);
auto ans0 = Interpreter(range0).interpretAllOutput(range0);
// Pytorch results
// torch.conv_transpose2d(X, K, stride=2, padding=1, dilation=1)
// X, K is NFHW and FCRS
vector<int> ans1 = {190, 193, 740, 755, 770, 785, 592, 604,
992, 1016, 2704, 2770, 2836, 2902, 1832, 1874,
1184, 1208, 3232, 3298, 3364, 3430, 2168, 2210,
1114, 1135, 2660, 2711, 2762, 2813, 1624, 1654};
ASSERT_EQ(ans0.size(), ans1.size());
for (size_t i = 0; i < ans0.size(); ++i)
EXPECT_EQ(ans0[i], ans1[i]);
}
ssize_t getOffset(vector<ssize_t> index, vector<int> shape) {
ssize_t ret = index[0];
for (size_t i = 1; i < index.size(); ++i)
ret = ret * shape[i] + index[i];
return ret;
}
TEST(TConv2gemm, TConv4x4_NHWF_RSFC_correctness_of_input_expr) {
const int N = 1, H = 2, W = 2, C = 2, F = 3;
const int R = 4, S = 4;
RangeOp range0 = buildTConv4x4_NHWF_RSFC(N, C, H, W, F, R, S);
Interpreter::Inputs inputs;
for (const auto &[name, tensor] : GetTensorsVisitor().get(range0)) {
auto data = make_ref<vector<int>>(tensor->getSize());
if (name == "A") {
for (ssize_t i = 0; i < tensor->getSize(); i++)
data->operator[](i) = i;
} else if (name == "K") {
for (ssize_t r = 0; r < R; r++)
for (ssize_t s = 0; s < S; s++)
for (ssize_t f = 0; f < F; f++)
for (ssize_t c = 0; c < C; c++) {
ssize_t index =
getOffset({r, s, f, c}, {R, S, F, C});
ssize_t num = getOffset({f, r, s, c}, {F, R, S, C});
data->operator[](index) = num;
}
} else
assert(0);
inputs.emplace(name, data);
}
auto ans0 = Interpreter(inputs).interpretAllOutput(range0);
// Pytorch results
// torch.conv_transpose2d(X, K, stride=2, padding=1, dilation=1)
// X, K is NFHW and FCRS
vector<int> ans1 = {190, 193, 740, 755, 770, 785, 592, 604,
992, 1016, 2704, 2770, 2836, 2902, 1832, 1874,
1184, 1208, 3232, 3298, 3364, 3430, 2168, 2210,
1114, 1135, 2660, 2711, 2762, 2813, 1624, 1654};
ASSERT_EQ(ans0.size(), ans1.size());
for (size_t i = 0; i < ans0.size(); ++i)
EXPECT_EQ(ans0[i], ans1[i]);
}
// TODO: Test after passing RSFC
// TEST(TConv2gemm, TConv4x4_NHWF_FRSC_search) {
// const int N = 1, H = 2, W = 2, C = 256, F = 448;
// const int R = 4, S = 4;
// RangeOp range = buildTConv4x4_NHWF_FRSC(N, C, H, W, F, R, S);
// const vector<int> rules{3, 2, 2, 2, 2, 5, 8, 8, 6, 90};
// Formula conv_9x9(range, 0);
// Derivator derivator;
// // derivator.ruleBasedDFS(
// // conv_9x9, 0, rules,
// // {{1, {x1, r}}, {2, {y1, s}}, {3, {x2, i2}}, {4, {y2, i4}}});
// derivator.dfs(conv_9x9, 0);
// // EXPECT_EQ(derivator.getSearchedMaxDepth(), ((int)rules.size()));
// ASSERT_GE(derivator.getNumCandidates(), 1);
// const auto &formula = derivator.getCandidates().front();
// EXPECT_TRUE(CountRoutineVisitor().match(formula.root, 1, 0, 3));
// derivator.print();
// }
TEST(TConv2gemm, TConv4x4_NHWF_RSFC_search) {
const int N = 16, H = 2, W = 2, C = 256, F = 448;
const int R = 4, S = 4;
RangeOp range = buildTConv4x4_NHWF_RSFC(N, C, H, W, F, R, S);
Formula conv_9x9(range, 0);
Derivator derivator;
bool isRuleBased = false;
if (isRuleBased) {
const vector<int> rules{3, 2, 2, 2, 2, 5};
derivator.setDumpFirstSuccess("TConv4x4_NHWF_RSFC_");
derivator.ruleBasedDFS(conv_9x9, 0, rules, {}, true);
} else
derivator.search(conv_9x9, 0);
ASSERT_GE(derivator.getNumCandidates(), 1);
derivator.print();
// for (const auto &f : derivator.getCandidates()) {
// dbg(CountRoutineVisitor().count(f.root));
// }
int nMatches = matchExprResult(
derivator,
"../test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_18.expr");
EXPECT_GE(nMatches, 1);
derivator.printStatistics();
}
TEST(TConv2gemm, TConv4x4_NHWF_FRSC_CheckDerivationCorrectness_log) {
const string fnPrefix =
"../test/nnet/log/TConv4x4_NHWF_RSFC/TConv4x4_NHWF_RSFC_";
EXPECT_TRUE(checkExprLogSame(fnPrefix, 0, 11));
}
// TODO: correct ConvTransPattern
TEST(Conv2conv, InfoGAN_ConvTranspose_3_OOB_Test) {
// ConvTranspose_3 in InfoGAN
const int n = 1, c = 256, h = 2, w = 2, f = 448, r = 4, s = 4;
int padding = 1 * (r - 1) - 1;
const auto A = nnet::makeTensor("A", {n, h, w, f},
std::vector<int>{0, padding, padding, 0});
const auto K = nnet::makeTensor("K", {f, c, r, s});
auto expr = ConvTransPattern::getExpr(A, K, n, c, h, w, f, r, s);
dbg(expr);
Derivator derivator;
derivator.checkOOB(as<RangeOpNode>(expr));
}