forked from jiuyuan/InfiniTensor
Add: debug hacks for InfoGAN
This commit is contained in:
parent
e86e993ed4
commit
0b23a065ca
|
@ -77,6 +77,7 @@ class TensorObj : public TensorBaseObj {
|
|||
// Thus the internal state of generator cannot be updated.
|
||||
void setData(
|
||||
std::function<void(void *, size_t, DataType)> const &generator) const;
|
||||
void setData(const Blob &_blob) { data = _blob; }
|
||||
Tensor clone() const {
|
||||
auto obj = make_ref<TensorObj>(*this);
|
||||
obj->freeData();
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "core/search_engine.h"
|
||||
#include "core/hash.h"
|
||||
#include "core/runtime.h"
|
||||
#include "nnet/dbg.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
|
@ -74,7 +75,9 @@ Graph SearchEngine::run(const Graph graph) {
|
|||
nextGraphs.emplace_back(tmp);
|
||||
}
|
||||
}
|
||||
dbg("===Num" + std::to_string(nextGraphs.size()));
|
||||
std::sort(nextGraphs.begin(), nextGraphs.end(), graphTimeComparer);
|
||||
|
||||
if (nextGraphs.size() > GRAPH_SIZE) {
|
||||
nextGraphs.resize(GRAPH_SIZE);
|
||||
}
|
||||
|
@ -122,6 +125,7 @@ std::vector<Graph> SearchEngine::search(const Graph &graph) {
|
|||
}
|
||||
|
||||
// compare with perf time
|
||||
dbg("===Num" + std::to_string(results.size()));
|
||||
std::sort(results.begin(), results.end(), graphTimeComparer);
|
||||
if (results.size() > GRAPH_SIZE) {
|
||||
results.resize(GRAPH_SIZE);
|
||||
|
@ -341,6 +345,9 @@ std::vector<Graph> SearchEngine::searchMutation(const MetaGraph &metaGraph) {
|
|||
std::vector<Graph> nextGraphs;
|
||||
if (node.type == 1) { // If it has computing OPs
|
||||
auto mutatedGraphs = mutator->run(node.graph);
|
||||
// // HACK: only try the first one for debug
|
||||
if (mutatedGraphs.size() > 2)
|
||||
mutatedGraphs.resize(2);
|
||||
for (auto graph : graphs) {
|
||||
for (auto mutatedGraph : mutatedGraphs) {
|
||||
std::vector<Operator> ops;
|
||||
|
@ -373,6 +380,7 @@ std::vector<Graph> SearchEngine::searchMutation(const MetaGraph &metaGraph) {
|
|||
for (auto g : nextGraphs) {
|
||||
g->dataMalloc();
|
||||
}
|
||||
dbg("===Num" + std::to_string(nextGraphs.size()));
|
||||
std::sort(nextGraphs.begin(), nextGraphs.end(), graphTimeComparer);
|
||||
if (nextGraphs.size() > GRAPH_SIZE) {
|
||||
nextGraphs.resize(GRAPH_SIZE);
|
||||
|
|
|
@ -49,7 +49,7 @@ class matmulCublas : public Kernel {
|
|||
const float alpha = 1.f, beta = 0.f;
|
||||
// TODO:use compute type
|
||||
cublasStatus_t stat;
|
||||
if (b > 1) {
|
||||
if (b >= 1) {
|
||||
// Support batch broadcast with zero stride
|
||||
int dimA = op->getInputs(0)->getDims().size();
|
||||
int dimB = op->getInputs(1)->getDims().size();
|
||||
|
@ -63,6 +63,11 @@ class matmulCublas : public Kernel {
|
|||
(dimB == 3 && op->getInputs(1)->getDims()[0] == 1))
|
||||
? 0 // Broadcast the batch dimension if batch size is 1
|
||||
: n * k;
|
||||
// printf("cublasGemmStridedBatchedEx %d%d, mnk %d %d %d, alpha %f,
|
||||
// B "
|
||||
// "%d %lld, A %d %lld, C %d %d, b %d %d\n",
|
||||
// opB, opA, n, m, k, alpha, ldb, strideB, lda, strideA, ldc,
|
||||
// m * n, b, record->algo);
|
||||
stat = cublasGemmStridedBatchedEx(
|
||||
context->cublasHandle(), opB, opA, n, m, k, &alpha, inBData,
|
||||
CUDA_R_32F, ldb, strideB, inAData, CUDA_R_32F, lda, strideA,
|
||||
|
|
|
@ -4,10 +4,13 @@ namespace infini {
|
|||
class CopyCuda : public CudaKernelWithoutConfig {
|
||||
void compute(const Operator &op,
|
||||
const RuntimeObj *_context) const override {
|
||||
auto inData = op->getInputs(0)->getRawDataPtr<void *>();
|
||||
auto outData = op->getOutputs()[0]->getRawDataPtr<void *>();
|
||||
cudaMemcpyAsync(outData, inData, op->getInputs(0)->getBytes(),
|
||||
cudaMemcpyDeviceToDevice);
|
||||
// auto inData = op->getInputs(0)->getRawDataPtr<void *>();
|
||||
// auto outData = op->getOutputs()[0]->getRawDataPtr<void *>();
|
||||
// cudaMemcpyAsync(outData, inData, op->getInputs(0)->getBytes(),
|
||||
// cudaMemcpyDeviceToDevice);
|
||||
|
||||
// HACK: optimization
|
||||
op->getOutputs()[0]->setData(op->getInputs(0)->getDataBlob());
|
||||
}
|
||||
};
|
||||
// reshape/flatten/identity all act as copying from input to output.
|
||||
|
|
|
@ -77,6 +77,9 @@ void NMutator::runSingleOpToNaiveMembound(Graph in_graph,
|
|||
void NMutator::runSingleOp(Graph in_graph, std::vector<Graph> &out_graphs) {
|
||||
OpVec computeOps = in_graph->getComputeOps();
|
||||
IT_ASSERT(computeOps.size() == 1);
|
||||
// HACK: remove this
|
||||
if (auto op = as<ConvTransposed2dNHWCObj>(computeOps[0]); !op)
|
||||
return;
|
||||
|
||||
// if (infini::Graph g = transformTConv1x1(computeOps[0])) {
|
||||
// out_graphs.emplace_back(g);
|
||||
|
@ -408,6 +411,9 @@ infini::Graph NMutator::expressionToGraph(nnet::Expr expr, Graph in_graph) {
|
|||
nameNToTensorT.at(BN->getName())};
|
||||
TensorVec outputsPET = {nameNToTensorT.at(outputNameN)};
|
||||
const auto &[b, m, n, k, transa, transb] = op->getArgs();
|
||||
// // HACK: pruning for deubg
|
||||
if (!((transa == 0) && (transb == 1)))
|
||||
return nullptr;
|
||||
g->addOpWithOutputs<MatmulObj>(inputsPET[0], inputsPET[1],
|
||||
outputsPET[0], transa, transb);
|
||||
}
|
||||
|
@ -536,11 +542,15 @@ Graph NMutator::transformConvtransposed1x1(Operator _op) {
|
|||
auto newA = g->addTensor(
|
||||
{inputDims[0] * inputDims[1] * inputDims[2], inputDims[3]}, dtype);
|
||||
// FRSC
|
||||
// auto newW = g->addTensor(
|
||||
// {weightDims[0], weightDims[1] * weightDims[2] * weightDims[3]},
|
||||
// dtype);
|
||||
// HACK: without transpoe
|
||||
auto newW = g->addTensor(
|
||||
{weightDims[0], weightDims[1] * weightDims[2] * weightDims[3]}, dtype);
|
||||
{weightDims[1] * weightDims[2] * weightDims[3], weightDims[0]}, dtype);
|
||||
g->addOpWithOutputs<ReshapeObj>(g->cloneTensor(A), newA, newA->getDims());
|
||||
g->addOpWithOutputs<ReshapeObj>(g->cloneTensor(W), newW, newW->getDims());
|
||||
Tensor newO = g->addOp<MatmulObj>(newA, newW, nullptr, 0, 0)->getOutput();
|
||||
Tensor newO = g->addOp<MatmulObj>(newA, newW, nullptr, 0, 1)->getOutput();
|
||||
g->addOpWithOutputs<ReshapeObj>(newO, g->cloneTensor(op->getOutput()),
|
||||
op->getOutput()->getDims());
|
||||
return g;
|
||||
|
|
|
@ -55,7 +55,6 @@ TEST(NMutator, NaiveConvWithInterpreter) {
|
|||
gg->getOutputs()[0]->getRawDataPtr<void *>());
|
||||
}
|
||||
|
||||
// FIXME: failed since implicit transpose for DLT
|
||||
TEST(NMutator, InfoGAN_TConv_3_correctness) {
|
||||
const bool useMutatorDirectly = false;
|
||||
Runtime runtime = make_ref<CudaRuntimeObj>();
|
||||
|
@ -123,6 +122,7 @@ TEST(NMutator, InfoGAN_TConv_3_correctness) {
|
|||
|
||||
auto go0 = gCpu->cloneTensor(g->getOutputs()[0]);
|
||||
auto bgo0 = gCpu->cloneTensor(bestGraph->getOutputs()[0]);
|
||||
dbg(runtime->getPerfTime(bestGraph, true));
|
||||
EXPECT_TRUE(go0->equalData(bgo0, 1e-4));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue