Add: debug hacks for InfoGAN

This commit is contained in:
Liyan Zheng 2023-04-19 17:23:32 +08:00
parent e86e993ed4
commit 0b23a065ca
6 changed files with 35 additions and 8 deletions

View File

@ -77,6 +77,7 @@ class TensorObj : public TensorBaseObj {
// Thus the internal state of generator cannot be updated.
void setData(
std::function<void(void *, size_t, DataType)> const &generator) const;
void setData(const Blob &_blob) { data = _blob; }
Tensor clone() const {
auto obj = make_ref<TensorObj>(*this);
obj->freeData();

View File

@ -1,6 +1,7 @@
#include "core/search_engine.h"
#include "core/hash.h"
#include "core/runtime.h"
#include "nnet/dbg.h"
#include <algorithm>
#include <iostream>
@ -74,7 +75,9 @@ Graph SearchEngine::run(const Graph graph) {
nextGraphs.emplace_back(tmp);
}
}
dbg("===Num" + std::to_string(nextGraphs.size()));
std::sort(nextGraphs.begin(), nextGraphs.end(), graphTimeComparer);
if (nextGraphs.size() > GRAPH_SIZE) {
nextGraphs.resize(GRAPH_SIZE);
}
@ -122,6 +125,7 @@ std::vector<Graph> SearchEngine::search(const Graph &graph) {
}
// compare with perf time
dbg("===Num" + std::to_string(results.size()));
std::sort(results.begin(), results.end(), graphTimeComparer);
if (results.size() > GRAPH_SIZE) {
results.resize(GRAPH_SIZE);
@ -341,6 +345,9 @@ std::vector<Graph> SearchEngine::searchMutation(const MetaGraph &metaGraph) {
std::vector<Graph> nextGraphs;
if (node.type == 1) { // If it has computing OPs
auto mutatedGraphs = mutator->run(node.graph);
// // HACK: only try the first one for debug
if (mutatedGraphs.size() > 2)
mutatedGraphs.resize(2);
for (auto graph : graphs) {
for (auto mutatedGraph : mutatedGraphs) {
std::vector<Operator> ops;
@ -373,6 +380,7 @@ std::vector<Graph> SearchEngine::searchMutation(const MetaGraph &metaGraph) {
for (auto g : nextGraphs) {
g->dataMalloc();
}
dbg("===Num" + std::to_string(nextGraphs.size()));
std::sort(nextGraphs.begin(), nextGraphs.end(), graphTimeComparer);
if (nextGraphs.size() > GRAPH_SIZE) {
nextGraphs.resize(GRAPH_SIZE);

View File

@ -49,7 +49,7 @@ class matmulCublas : public Kernel {
const float alpha = 1.f, beta = 0.f;
// TODO:use compute type
cublasStatus_t stat;
if (b > 1) {
if (b >= 1) {
// Support batch broadcast with zero stride
int dimA = op->getInputs(0)->getDims().size();
int dimB = op->getInputs(1)->getDims().size();
@ -63,6 +63,11 @@ class matmulCublas : public Kernel {
(dimB == 3 && op->getInputs(1)->getDims()[0] == 1))
? 0 // Broadcast the batch dimension if batch size is 1
: n * k;
// printf("cublasGemmStridedBatchedEx %d%d, mnk %d %d %d, alpha %f,
// B "
// "%d %lld, A %d %lld, C %d %d, b %d %d\n",
// opB, opA, n, m, k, alpha, ldb, strideB, lda, strideA, ldc,
// m * n, b, record->algo);
stat = cublasGemmStridedBatchedEx(
context->cublasHandle(), opB, opA, n, m, k, &alpha, inBData,
CUDA_R_32F, ldb, strideB, inAData, CUDA_R_32F, lda, strideA,

View File

@ -4,10 +4,13 @@ namespace infini {
class CopyCuda : public CudaKernelWithoutConfig {
void compute(const Operator &op,
const RuntimeObj *_context) const override {
auto inData = op->getInputs(0)->getRawDataPtr<void *>();
auto outData = op->getOutputs()[0]->getRawDataPtr<void *>();
cudaMemcpyAsync(outData, inData, op->getInputs(0)->getBytes(),
cudaMemcpyDeviceToDevice);
// auto inData = op->getInputs(0)->getRawDataPtr<void *>();
// auto outData = op->getOutputs()[0]->getRawDataPtr<void *>();
// cudaMemcpyAsync(outData, inData, op->getInputs(0)->getBytes(),
// cudaMemcpyDeviceToDevice);
// HACK: optimization
op->getOutputs()[0]->setData(op->getInputs(0)->getDataBlob());
}
};
// reshape/flatten/identity all act as copying from input to output.

View File

@ -77,6 +77,9 @@ void NMutator::runSingleOpToNaiveMembound(Graph in_graph,
void NMutator::runSingleOp(Graph in_graph, std::vector<Graph> &out_graphs) {
OpVec computeOps = in_graph->getComputeOps();
IT_ASSERT(computeOps.size() == 1);
// HACK: remove this
if (auto op = as<ConvTransposed2dNHWCObj>(computeOps[0]); !op)
return;
// if (infini::Graph g = transformTConv1x1(computeOps[0])) {
// out_graphs.emplace_back(g);
@ -408,6 +411,9 @@ infini::Graph NMutator::expressionToGraph(nnet::Expr expr, Graph in_graph) {
nameNToTensorT.at(BN->getName())};
TensorVec outputsPET = {nameNToTensorT.at(outputNameN)};
const auto &[b, m, n, k, transa, transb] = op->getArgs();
// // HACK: pruning for deubg
if (!((transa == 0) && (transb == 1)))
return nullptr;
g->addOpWithOutputs<MatmulObj>(inputsPET[0], inputsPET[1],
outputsPET[0], transa, transb);
}
@ -536,11 +542,15 @@ Graph NMutator::transformConvtransposed1x1(Operator _op) {
auto newA = g->addTensor(
{inputDims[0] * inputDims[1] * inputDims[2], inputDims[3]}, dtype);
// FRSC
// auto newW = g->addTensor(
// {weightDims[0], weightDims[1] * weightDims[2] * weightDims[3]},
// dtype);
// HACK: without transpoe
auto newW = g->addTensor(
{weightDims[0], weightDims[1] * weightDims[2] * weightDims[3]}, dtype);
{weightDims[1] * weightDims[2] * weightDims[3], weightDims[0]}, dtype);
g->addOpWithOutputs<ReshapeObj>(g->cloneTensor(A), newA, newA->getDims());
g->addOpWithOutputs<ReshapeObj>(g->cloneTensor(W), newW, newW->getDims());
Tensor newO = g->addOp<MatmulObj>(newA, newW, nullptr, 0, 0)->getOutput();
Tensor newO = g->addOp<MatmulObj>(newA, newW, nullptr, 0, 1)->getOutput();
g->addOpWithOutputs<ReshapeObj>(newO, g->cloneTensor(op->getOutput()),
op->getOutput()->getDims());
return g;

View File

@ -55,7 +55,6 @@ TEST(NMutator, NaiveConvWithInterpreter) {
gg->getOutputs()[0]->getRawDataPtr<void *>());
}
// FIXME: failed since implicit transpose for DLT
TEST(NMutator, InfoGAN_TConv_3_correctness) {
const bool useMutatorDirectly = false;
Runtime runtime = make_ref<CudaRuntimeObj>();
@ -123,6 +122,7 @@ TEST(NMutator, InfoGAN_TConv_3_correctness) {
auto go0 = gCpu->cloneTensor(g->getOutputs()[0]);
auto bgo0 = gCpu->cloneTensor(bestGraph->getOutputs()[0]);
dbg(runtime->getPerfTime(bestGraph, true));
EXPECT_TRUE(go0->equalData(bgo0, 1e-4));
}
}