forked from jiuyuan/InfiniTensor
Compare commits
34 Commits
master
...
optimizati
Author | SHA1 | Date |
---|---|---|
YdrMaster | c077a61681 | |
YdrMaster | 325b279468 | |
YdrMaster | 7b23fdbbfe | |
YdrMaster | e3428d8fd8 | |
YdrMaster | 3631b03e73 | |
YdrMaster | f78ff0e8ee | |
YdrMaster | 4a5e66b36a | |
YdrMaster | 7d7d923e8d | |
YdrMaster | 2a147c235d | |
YdrMaster | 17033fad97 | |
YdrMaster | 28287f3782 | |
YdrMaster | cc62a3216d | |
YdrMaster | 9cfe223953 | |
YdrMaster | 34f7d7e9ed | |
YdrMaster | 59a46f3ff9 | |
YdrMaster | 72788e8e0a | |
YdrMaster | bd61cf4533 | |
YdrMaster | f0f8915433 | |
YdrMaster | bb5bfb0be8 | |
YdrMaster | a56e86dfa9 | |
YdrMaster | 5129d312d2 | |
YdrMaster | 45f7e891f1 | |
YdrMaster | b4b5157bd4 | |
YdrMaster | 63cc93aadc | |
YdrMaster | ddaf6685b3 | |
YdrMaster | 385586d57b | |
YdrMaster | 320468b627 | |
YdrMaster | 8f38a41fb6 | |
YdrMaster | a6a0141234 | |
YdrMaster | 36b0c5855c | |
YdrMaster | 0ad0150b87 | |
YdrMaster | 334e0cccbc | |
YdrMaster | cc6c18b00f | |
whjthu | d9da06eb67 |
|
@ -60,6 +60,10 @@ if(USE_PROTOBUF)
|
|||
endif()
|
||||
|
||||
include_directories(include)
|
||||
|
||||
add_subdirectory(optimization)
|
||||
include_directories(optimization/include)
|
||||
|
||||
# Pybind11
|
||||
add_subdirectory(3rd-party/pybind11)
|
||||
include_directories(3rd-party/pybind11/include)
|
||||
|
@ -98,7 +102,7 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -UNDEBUG")
|
|||
|
||||
|
||||
# Source files
|
||||
file(GLOB_RECURSE SRC src/ffi/*.cc src/core/*.cc src/kernels/cpu/*.cc src/nnet/*.cc src/operators/*.cc src/utils/*.cc)
|
||||
file(GLOB_RECURSE SRC src/ffi/*.cc src/core/*.cc src/kernels/cpu/*.cc src/nnet/*.cc src/operators/*.cc src/optimizations/*.cc src/utils/*.cc)
|
||||
|
||||
if(USE_CUDA)
|
||||
file(GLOB_RECURSE SRC_CUDA src/cuda/*.cc src/cuda/*.cu src/kernels/cuda/*.cc src/kernels/cuda/*.cu)
|
||||
|
@ -121,7 +125,7 @@ if(USE_PROTOBUF)
|
|||
target_link_libraries(InfiniTensor tensor_proto)
|
||||
endif()
|
||||
|
||||
target_link_libraries(InfiniTensor pybind11::embed)
|
||||
target_link_libraries(InfiniTensor optimization pybind11::embed)
|
||||
|
||||
# TVM backend
|
||||
if(BUILD_TEST_EINNET)
|
||||
|
@ -151,7 +155,6 @@ if(USE_INTELCPU)
|
|||
set(DNNL_CONFIGURATION "cpu_gomp")
|
||||
find_package(dnnl CONFIG REQUIRED)
|
||||
if(dnnl_FOUND)
|
||||
add_compile_definitions(USE_MKL=1)
|
||||
include_directories(BEFORE ${dnnl_DIR}/../../../cpu_gomp/include/)
|
||||
link_directories(${dnnl_DIR}/../../../cpu_gomp/lib)
|
||||
target_link_libraries(InfiniTensor dnnl)
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
#include "optimizations/partitions/partition.h"
|
||||
|
||||
namespace infini {
|
||||
class GlobalGraphPartition : public Partition {
|
||||
Graph run(const GraphObj &graph, const Transformation &tr,
|
||||
const Rating &rating) const override {
|
||||
return rankCandidates(graph, tr, rating).top().graph;
|
||||
}
|
||||
};
|
||||
} // namespace infini
|
|
@ -0,0 +1,33 @@
|
|||
#pragma once
|
||||
|
||||
#include "core/graph.h"
|
||||
#include "optimizations/rate/rating.h"
|
||||
#include "optimizations/transformations/transformation.h"
|
||||
#include <queue>
|
||||
|
||||
namespace infini {
|
||||
class Partition {
|
||||
public:
|
||||
virtual Graph run(const GraphObj &, const Transformation &,
|
||||
const Rating &) const = 0;
|
||||
struct Candidate {
|
||||
Graph graph;
|
||||
Rating::Cost cost;
|
||||
|
||||
bool operator<(Candidate others) const { return cost < others.cost; }
|
||||
bool operator>(Candidate others) const { return cost > others.cost; }
|
||||
};
|
||||
|
||||
protected:
|
||||
using CandidateQueue = std::priority_queue<Candidate, vector<Candidate>,
|
||||
std::greater<Candidate>>;
|
||||
|
||||
/// @brief Rank the subgraph candidates.
|
||||
/// @param subgraph The subgraph to transform.
|
||||
/// @param tr Transformation object.
|
||||
/// @return Ranked candidates.
|
||||
CandidateQueue rankCandidates(const GraphObj &subgraph,
|
||||
const Transformation &tr,
|
||||
const Rating &rating) const;
|
||||
};
|
||||
} // namespace infini
|
|
@ -0,0 +1,8 @@
|
|||
#include "optimizations/partitions/partition.h"
|
||||
|
||||
namespace infini {
|
||||
class SingleOperatorPartition : public Partition {
|
||||
Graph run(const GraphObj &, const Transformation &,
|
||||
const Rating &) const override;
|
||||
};
|
||||
} // namespace infini
|
|
@ -0,0 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
#include "core/graph.h"
|
||||
#include "optimizations/partitions/partition.h"
|
||||
|
||||
namespace infini {
|
||||
class Pass {
|
||||
std::unique_ptr<Partition> p;
|
||||
std::unique_ptr<Transformation> tr;
|
||||
std::unique_ptr<Rating> rating;
|
||||
|
||||
public:
|
||||
Pass(std::unique_ptr<Partition> p, std::unique_ptr<Transformation> tr,
|
||||
std::unique_ptr<Rating> rating)
|
||||
: p(std::move(p)), tr(std::move(tr)), rating(std::move(rating)) {}
|
||||
|
||||
Graph run(const GraphObj &graph) const {
|
||||
return p->run(graph, *tr, *rating);
|
||||
}
|
||||
};
|
||||
} // namespace infini
|
|
@ -0,0 +1,25 @@
|
|||
#include "core/graph.h"
|
||||
#include "pass.h"
|
||||
|
||||
namespace infini {
|
||||
class PassManager {
|
||||
public:
|
||||
PassManager() {}
|
||||
|
||||
Graph run(Graph graph) {
|
||||
for (auto pass : passes)
|
||||
graph = pass->run(*graph);
|
||||
return graph;
|
||||
}
|
||||
|
||||
bool addPass(std::unique_ptr<Partition> p,
|
||||
std::unique_ptr<Transformation> tr,
|
||||
std::unique_ptr<Rating> rating) {
|
||||
passes.emplace_back(std::move(p), std::move(tr), std::move(rating));
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
vector<Ref<Pass>> passes;
|
||||
};
|
||||
} // namespace infini
|
|
@ -0,0 +1,22 @@
|
|||
#pragma once
|
||||
|
||||
#include "optimizations/rate/rating.h"
|
||||
#include <numeric>
|
||||
|
||||
namespace infini {
|
||||
/**
|
||||
* Rate a `Graph` by its memory usage.
|
||||
*/
|
||||
class MemoryRating : public Rating {
|
||||
public:
|
||||
/**
|
||||
* Run the `Rating` on the graph.
|
||||
*/
|
||||
float run(const GraphObj &graph) const override {
|
||||
auto tensors = graph.getTensors();
|
||||
return static_cast<float>(
|
||||
std::accumulate(tensors.begin(), tensors.end(), (size_t)0,
|
||||
[](auto x) { return x.size(); }));
|
||||
}
|
||||
};
|
||||
} // namespace infini
|
|
@ -0,0 +1,17 @@
|
|||
#pragma once
|
||||
|
||||
#include "core/graph.h"
|
||||
|
||||
namespace infini {
|
||||
/// @brief Rate a `Graph`.
|
||||
class Rating {
|
||||
public:
|
||||
/// @brief Cost of a substitute.
|
||||
using Cost = float;
|
||||
|
||||
/// @brief Run the `Rating` on the `graph`.
|
||||
/// @param graph The graph to rate.
|
||||
/// @return The cost of `graph`.
|
||||
virtual Cost run(const GraphObj &graph) const = 0;
|
||||
};
|
||||
} // namespace infini
|
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include "optimizations/rate/rating.h"
|
||||
|
||||
namespace infini {
|
||||
/**
|
||||
* Rate a `Graph` by its memory usage.
|
||||
*/
|
||||
class TimeRating : public Rating {
|
||||
public:
|
||||
/**
|
||||
* Run the `Rating` on the graph.
|
||||
*/
|
||||
float run(const GraphObj &graph) const override;
|
||||
};
|
||||
} // namespace infini
|
|
@ -0,0 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
#include "core/common.h"
|
||||
#include "core/graph.h"
|
||||
#include "core/runtime.h"
|
||||
|
||||
namespace infini {
|
||||
class Transformation {
|
||||
public:
|
||||
virtual vector<Graph> run(const GraphObj &graph) const {
|
||||
return {make_ref<GraphObj>(graph)};
|
||||
};
|
||||
};
|
||||
} // namespace infini
|
|
@ -0,0 +1,14 @@
|
|||
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
|
||||
|
||||
project(optimization LANGUAGES CXX C)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "$ENV{CMAKE_CXX_FLAGS} -fPIC")
|
||||
|
||||
file(GLOB_RECURSE SRC src/*.h src/*.cc src/*.cpp)
|
||||
add_library(optimization ${SRC})
|
||||
|
||||
file(GLOB_RECURSE TEST test/*.cpp)
|
||||
add_executable(test_optimization ${TEST})
|
||||
target_link_libraries(test_optimization optimization)
|
|
@ -0,0 +1,14 @@
|
|||
.PHONY : build clean install-python test-cpp test-onnx
|
||||
|
||||
TYPE ?= release
|
||||
|
||||
build:
|
||||
mkdir -p build/$(TYPE)
|
||||
cd build/$(TYPE) && cmake $(CMAKE_OPT) ../.. && make -j8
|
||||
|
||||
test: build
|
||||
build/$(TYPE)/test
|
||||
@echo Done
|
||||
|
||||
clean:
|
||||
rm -rf build
|
|
@ -0,0 +1,24 @@
|
|||
# 设计概述
|
||||
|
||||
1. 宗旨:简化
|
||||
尽量简化定义。凡是当前用不到的就去掉,以保证开发同时保持敏捷和可靠。
|
||||
2. 目标:中层接口
|
||||
因为不是面向用户开发,不需要考虑接口易用性,也不需要过度考虑防御式编程。调用这些 api 的将会是第一方编写的上层代码。
|
||||
3. 文档
|
||||
覆盖率应逼近 100%。
|
||||
|
||||
## 代码解析
|
||||
|
||||
重要的文件是 [tensor.h](src/tensor.h)、[graph.h](src/graph.h) 和 [mutation.h](src/mutation.h)。
|
||||
|
||||
tensor.h 提供了这个图表示中张量的定义。张量的结构由形状、数据类型和数据组成,并储存了在每个图上和算子的连接关系。张量的所有权由所有连接到张量的算子共享,因此其存在的唯一形式就是 `std::shared_ptr`,脱离共享所有权智能指针是没有意义的。通过向工厂函数直接传递形状、数据类型和数据,直接构造智能指针的张量,一旦所有连接到张量的算子释放,算子也被释放。
|
||||
|
||||
算子定义和其他重要类型定义在 graph.h。算子中存储且仅存储一个算子类型,其他所有信息都由其持有所有权的输入张量表示。算子类型指示了算子如何解释自己的每个输入输出张量。这样做为算子提供了充分的灵活性,同时也不损失表达能力——那些决定算子如何工作的重要信息必定已经保存在张量的数据中了,而算子类型会解释它们是谁。
|
||||
|
||||
算子的所有权属于一张图,确切地说,一张未分的或不可再分的单体图,`Unigraph`。每个算子由唯一的图控制,多个图之间不共享算子的任何部分。由于算子的定义非常轻(1 枚举 + 2 智能指针的数组),这样做不会带来大的开销,但减轻了所有权的管理难度——有且只有张量一种对象,会在算子这一种对象之间共享,其他所有东西的所有权都是独占的。
|
||||
|
||||
同时,`Unigraph` 具有只增性。只能向其中增加算子,必须以拓扑序,不能移除,也不能修改算子的顺序。因此,算子在图中的序号是唯一的,每个图则持有一个唯一的 ID。因此,可以用 ID 来指示图,用序号指示算子(`OpRef`);用序号指示算子,再用序号指示张量(`TensorPos`)。图必须整体销毁,销毁时,其中所有算子控制的所有张量连接也会同时销毁。因此,不必维持不可独立存在的所有权关系。
|
||||
|
||||
mutation.h 的 `Partition`、`Mutation` 和 `Rating` 三个类用于支持图的规则优化。这三个类本质是一样的,这种定义是为了对优化的不同阶段实现编译时的约束——一轮优化必须按划分→突变→评价的顺序依次执行每个操作一次。
|
||||
|
||||
这些类中保存的是一个 `Mutant` 的二维数组。每个 `Mutant` 是子图的一种突变体,存储了子图结构和评分。内层数组表示每个子图的多个变体,外层数组表示每张图的多个子图。显然,`Partition` 输入完整的图,并构建外层数组的结构,`Mutation` 将填充内层数组。`Rating` 填充每个突变体的得分,然后从高到低排序。接下来可以用序号向量指导图的重建。
|
|
@ -0,0 +1,22 @@
|
|||
#pragma once
|
||||
|
||||
#include "../../src/mutation.h"
|
||||
#include "../../src/pass/single_operator.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace optimization {
|
||||
|
||||
/// @brief Calculates the memory usage of a graph.
|
||||
/// @param arg0 The graph.
|
||||
/// @return The reciprocal of the total memory usage of the graph in bytes.
|
||||
inline float memory_usage(Unigraph const &g) {
|
||||
std::unordered_set<size_t> mark;
|
||||
uintptr_t memory;
|
||||
for (const auto &op : g.operators)
|
||||
for (const auto &t : op.outputs)
|
||||
if (mark.insert(reinterpret_cast<uintptr_t>(t.get())).second)
|
||||
memory += t->size();
|
||||
return 1e6f / static_cast<float>(memory);
|
||||
}
|
||||
|
||||
} // namespace optimization
|
|
@ -0,0 +1,78 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace optimization {
|
||||
|
||||
/// @brief Stores tensor data。
|
||||
class Data {
|
||||
/// @brief `cpu_data` is stored in the memory space,
|
||||
/// which allows it to be managed using `std::vector<uint8_t>`.
|
||||
uint8_t *cpu_data;
|
||||
|
||||
// #ifdef USE_CUDA
|
||||
// void *gpu_data;
|
||||
// #endif
|
||||
|
||||
// #ifdef USE_BANG
|
||||
// void *mlu_data;
|
||||
// #endif
|
||||
|
||||
Data(uint8_t *ptr) : cpu_data(ptr) {}
|
||||
|
||||
public:
|
||||
Data() : cpu_data(nullptr) {}
|
||||
Data(size_t size) : cpu_data(new uint8_t[size]) {}
|
||||
template <class t> Data(t begin, t end) : cpu_data(nullptr) {
|
||||
size_t c = sizeof(decltype(*begin)) * static_cast<size_t>(end - begin);
|
||||
cpu_data = new uint8_t[c];
|
||||
std::copy(begin, end, cpu_data);
|
||||
}
|
||||
Data(Data const &) = delete;
|
||||
Data(Data &&others) noexcept
|
||||
: cpu_data(std::exchange(others.cpu_data, nullptr)) {}
|
||||
~Data() noexcept { delete[] cpu_data; }
|
||||
|
||||
Data &operator=(Data const &) = delete;
|
||||
Data &operator=(Data &&others) noexcept {
|
||||
if (this != &others)
|
||||
delete[] std::exchange(cpu_data,
|
||||
std::exchange(others.cpu_data, nullptr));
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// @brief Builds `Data` from `vector` os any type `t`.
|
||||
/// @tparam t Data type.
|
||||
/// @param data Data `vector`.
|
||||
/// @return `Data` object.
|
||||
template <class t> static Data cpu(std::vector<t> const &data) {
|
||||
auto const len = data.size();
|
||||
auto const size = sizeof(t[len]);
|
||||
Data ans;
|
||||
memcpy(ans.cpu_data, data.data(), size);
|
||||
return ans;
|
||||
}
|
||||
|
||||
/// @brief Gets data ptr.
|
||||
/// @tparam t Data type.
|
||||
/// @return Data ptr.
|
||||
template <class t> t *as_ptr() const {
|
||||
return reinterpret_cast<t *>(cpu_data);
|
||||
}
|
||||
|
||||
/// @brief Copies data to a `Vec`.
|
||||
/// @tparam t Data type.
|
||||
/// @param len Count of data.
|
||||
/// @return The data `Vec`.
|
||||
template <class t> std::vector<t> to_vec(size_t len) const {
|
||||
std::vector<t> ans(len);
|
||||
memcpy(cpu_data, ans.data(), sizeof(t[len]));
|
||||
return ans;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace optimization
|
|
@ -0,0 +1,34 @@
|
|||
#include "data_type.h"
|
||||
|
||||
using namespace optimization;
|
||||
|
||||
size_t DataType::size() const {
|
||||
switch (id) {
|
||||
case DataTypeId::FLOAT:
|
||||
return sizeof(float);
|
||||
case DataTypeId::UINT8:
|
||||
return sizeof(uint8_t);
|
||||
case DataTypeId::INT8:
|
||||
return sizeof(int8_t);
|
||||
case DataTypeId::UINT16:
|
||||
return sizeof(uint16_t);
|
||||
case DataTypeId::INT16:
|
||||
return sizeof(int16_t);
|
||||
case DataTypeId::INT32:
|
||||
return sizeof(int32_t);
|
||||
case DataTypeId::INT64:
|
||||
return sizeof(int64_t);
|
||||
case DataTypeId::BOOL:
|
||||
return sizeof(bool);
|
||||
case DataTypeId::FLOAT16:
|
||||
return 2;
|
||||
case DataTypeId::DOUBLE:
|
||||
return sizeof(double);
|
||||
case DataTypeId::UINT32:
|
||||
return sizeof(uint32_t);
|
||||
case DataTypeId::UINT64:
|
||||
return sizeof(uint64_t);
|
||||
default:
|
||||
throw "unsupported data type.";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace optimization {
|
||||
|
||||
enum class DataTypeId : uint8_t {
|
||||
UNDEFINED,
|
||||
FLOAT,
|
||||
UINT8,
|
||||
INT8,
|
||||
UINT16,
|
||||
INT16,
|
||||
INT32,
|
||||
INT64,
|
||||
STRING,
|
||||
BOOL,
|
||||
FLOAT16,
|
||||
DOUBLE,
|
||||
UINT32,
|
||||
UINT64,
|
||||
// COMPLEX64,
|
||||
// COMPLEX128,
|
||||
// BFLOAT16,
|
||||
};
|
||||
|
||||
struct DataType {
|
||||
DataTypeId id;
|
||||
|
||||
size_t size() const;
|
||||
};
|
||||
|
||||
template <class t> DataType ty();
|
||||
template <> inline DataType ty<float>() { return {DataTypeId::FLOAT}; }
|
||||
template <> inline DataType ty<uint8_t>() { return {DataTypeId::UINT8}; }
|
||||
template <> inline DataType ty<int8_t>() { return {DataTypeId::INT8}; }
|
||||
template <> inline DataType ty<uint16_t>() { return {DataTypeId::UINT16}; }
|
||||
template <> inline DataType ty<int16_t>() { return {DataTypeId::INT16}; }
|
||||
template <> inline DataType ty<int32_t>() { return {DataTypeId::INT32}; }
|
||||
template <> inline DataType ty<int64_t>() { return {DataTypeId::INT64}; }
|
||||
template <> inline DataType ty<bool>() { return {DataTypeId::BOOL}; }
|
||||
template <> inline DataType ty<double>() { return {DataTypeId::DOUBLE}; }
|
||||
template <> inline DataType ty<uint32_t>() { return {DataTypeId::UINT32}; }
|
||||
template <> inline DataType ty<uint64_t>() { return {DataTypeId::UINT64}; }
|
||||
|
||||
} // namespace optimization
|
|
@ -0,0 +1,68 @@
|
|||
#include "graph.h"
|
||||
|
||||
using namespace optimization;
|
||||
|
||||
static size_t GRAPH_ID = 1;
|
||||
|
||||
Unigraph::Unigraph() : id(GRAPH_ID++) {}
|
||||
|
||||
Unigraph::Unigraph(Unigraph &&others)
|
||||
: id(std::exchange(others.id, 0)), operators(std::move(others.operators)) {}
|
||||
|
||||
Unigraph::~Unigraph() {
|
||||
for (auto &op : operators) {
|
||||
for (auto &i : op.inputs)
|
||||
i->target.erase(i->target.find(this->id));
|
||||
for (auto &o : op.outputs)
|
||||
o->source.erase(o->source.find(this->id));
|
||||
}
|
||||
}
|
||||
|
||||
Unigraph &Unigraph::operator=(Unigraph &&others) {
|
||||
if (this == &others)
|
||||
return *this;
|
||||
|
||||
for (auto &op : operators) {
|
||||
for (auto &i : op.inputs)
|
||||
i->target.erase(i->target.find(this->id));
|
||||
for (auto &o : op.outputs)
|
||||
o->source.erase(o->source.find(this->id));
|
||||
}
|
||||
|
||||
this->id = std::exchange(others.id, 0);
|
||||
this->operators = std::move(others.operators);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
OpRef Unigraph::push_operator( // fmt: new line
|
||||
OpType op_type, //
|
||||
Vec<Arc<Tensor>> inputs, //
|
||||
Vec<Arc<Tensor>> outputs //
|
||||
) {
|
||||
auto ans = OpRef{this->id, operators.size()};
|
||||
|
||||
size_t i = 0;
|
||||
for (auto &input : inputs) {
|
||||
auto it = input->target.find(ans.graph);
|
||||
if (it == input->target.end())
|
||||
input->target[ans.graph] = {{ans.op, i++}};
|
||||
else
|
||||
it->second.push_back({ans.op, i++});
|
||||
}
|
||||
i = 0;
|
||||
for (auto &output : outputs) {
|
||||
auto it = output->source.find(ans.graph);
|
||||
if (it == output->source.end())
|
||||
output->source[ans.graph] = {ans.op, i++};
|
||||
else
|
||||
throw "tensor source exist";
|
||||
}
|
||||
|
||||
this->operators.push_back({
|
||||
op_type, // fmt: new line
|
||||
std::move(inputs), //
|
||||
std::move(outputs), //
|
||||
});
|
||||
return ans;
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
#pragma once
|
||||
|
||||
#include "op_type.h"
|
||||
#include "tensor.h"
|
||||
|
||||
namespace optimization {
|
||||
|
||||
/// @brief a struct to represent an operator in the computation graph.
|
||||
/// The ownership of an `Operator` belongs to one `Unigraph`.
|
||||
struct Operator {
|
||||
/// @brief Type of the operator.
|
||||
OpType op_type;
|
||||
|
||||
/// @brief Input and output tensors.
|
||||
/// Notice: ownership of the tensors are shared between
|
||||
/// operators that generate and use the same tensor.
|
||||
Vec<Arc<Tensor>> inputs, outputs;
|
||||
};
|
||||
|
||||
/// @brief A reference of an `Operator` in a `Unigraph`.
|
||||
struct OpRef {
|
||||
/// @brief `graph` for unique identifier of `Unigraph`.
|
||||
/// `op` for `Operator` index in `Unigraph`.
|
||||
size_t graph, op;
|
||||
};
|
||||
|
||||
/// @brief An unpartitioned graph or an unpartitionable minimum graph.
|
||||
struct Unigraph {
|
||||
/// @brief Unique identifier.
|
||||
size_t id;
|
||||
/// @brief List of operators in the graph with topological order.
|
||||
Vec<Operator> operators;
|
||||
|
||||
Unigraph();
|
||||
Unigraph(Unigraph const &) = delete;
|
||||
Unigraph(Unigraph &&others);
|
||||
~Unigraph();
|
||||
|
||||
Unigraph &operator=(Unigraph const &) = delete;
|
||||
Unigraph &operator=(Unigraph &&);
|
||||
|
||||
/// @brief Pushs an `Operator` into graph.
|
||||
/// Every `Operator` must be pushed in topological order.
|
||||
/// @param op_type Operator type.
|
||||
/// @param inputs Input tensors.
|
||||
/// @param outputs Output tensors.
|
||||
/// @return An `OpRef`.
|
||||
OpRef push_operator( // fmt: new line
|
||||
OpType op_type, //
|
||||
Vec<Arc<Tensor>> inputs, //
|
||||
Vec<Arc<Tensor>> outputs //
|
||||
);
|
||||
};
|
||||
|
||||
} // namespace optimization
|
|
@ -0,0 +1,164 @@
|
|||
#pragma once
|
||||
|
||||
#include "graph.h"
|
||||
#include <functional>
|
||||
|
||||
namespace optimization {
|
||||
|
||||
/// @brief A candidate subgraph mutant.
|
||||
struct Mutant {
|
||||
/// @brief The mutated subgraph.
|
||||
Unigraph graph;
|
||||
|
||||
/// @brief A score representing the quality of the mutant.
|
||||
float score;
|
||||
|
||||
Mutant(Unigraph &&g) : graph(std::move(g)), score(1.0f) {}
|
||||
Mutant(Mutant const &) = delete;
|
||||
Mutant(Mutant &&others)
|
||||
: graph(std::move(others.graph)),
|
||||
score(std::exchange(others.score, 1.0f)) {}
|
||||
|
||||
Mutant &operator=(Mutant const &) = delete;
|
||||
Mutant &operator=(Mutant &&others) {
|
||||
if (this != &others) {
|
||||
this->graph = std::move(others.graph);
|
||||
this->score = std::exchange(others.score, 1.0f);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief A subgraph partition with `PartitionType`, will be mutated into
|
||||
/// multiple `Mutant`s.
|
||||
/// @tparam PartitionType To partition this subgraph.
|
||||
template <class PartitionType> struct SubGraph {
|
||||
Vec<Mutant> mutants;
|
||||
PartitionType type;
|
||||
};
|
||||
|
||||
template <class t> Vec<size_t> list_size(Vec<Vec<t>> const &);
|
||||
template <class PartitionType> class Mutation;
|
||||
template <class PartitionType> class Rating;
|
||||
|
||||
/// @brief Partitioned subgraphs.
|
||||
template <class PartitionType> struct Partition {
|
||||
/// @brief 2D vector of `Mutant` instances for each partitioned subgraph.
|
||||
Vec<SubGraph<PartitionType>> parts;
|
||||
|
||||
friend Mutation<PartitionType>;
|
||||
|
||||
public:
|
||||
/// @brief A functional object that takes an unpartitioned graph as input
|
||||
/// and returns a vector of partitioned subgraphs.
|
||||
using Func =
|
||||
std::function<Vec<std::pair<Unigraph, PartitionType>>(Unigraph &&)>;
|
||||
|
||||
/// @brief Constructs a partitioned graph from an unpartitioned graph
|
||||
/// using a partitioning function.
|
||||
/// @param g An unpartitioned graph.
|
||||
/// @param f A function that takes an unpartitioned graph as input
|
||||
/// and returns a vector of partitioned subgraphs.
|
||||
Partition(Unigraph &&g, Func const &f) {
|
||||
for (auto &[g_, t] : f(std::move(g))) {
|
||||
auto &sub = this->parts.emplace_back();
|
||||
sub.mutants.emplace_back(std::move(g_));
|
||||
sub.type = std::move(t);
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Returns mutant vector size.
|
||||
/// @return 2D vector size.
|
||||
Vec<size_t> size() const { return list_size(parts); }
|
||||
};
|
||||
|
||||
/// @brief Generates mutants for every subgraph.
|
||||
template <class PartitionType> class Mutation {
|
||||
/// @brief 2D vector of `Mutant` instances for each partitioned subgraph.
|
||||
Vec<SubGraph<PartitionType>> parts;
|
||||
|
||||
friend Rating<PartitionType>;
|
||||
|
||||
public:
|
||||
/// @brief A functional object that takes a subgraph as input
|
||||
/// and returns a vector of mutated graphs.
|
||||
using Func =
|
||||
std::function<Vec<Unigraph>(Unigraph const &, PartitionType const &)>;
|
||||
|
||||
/// @brief Mutates every subgraph in a partitioned graph.
|
||||
/// @param p The partitioned graph to be mutated.
|
||||
/// @param f A function that takes a subgraph as input
|
||||
/// and returns a vector of mutated graphs.
|
||||
Mutation(Partition<PartitionType> &&p, Func const &f)
|
||||
: parts(std::move(p.parts)) {
|
||||
for (auto &sub : parts)
|
||||
for (auto &m : f(sub.mutants.front().graph, sub.type))
|
||||
sub.mutants.emplace_back(std::move(m));
|
||||
}
|
||||
|
||||
/// @brief Returns mutant vector size.
|
||||
/// @return 2D vector size.
|
||||
Vec<size_t> size() const { return list_size(parts); }
|
||||
};
|
||||
|
||||
/// @brief Rates each subgraph mutant.
|
||||
template <class PartitionType> class Rating {
|
||||
/// @brief 2D vector of `Mutant` instances for each partitioned subgraph.
|
||||
Vec<SubGraph<PartitionType>> parts;
|
||||
|
||||
public:
|
||||
/// @brief A functional object that takes a mutated subgraph as input
|
||||
/// and returns its score.
|
||||
using Func = std::function<float(Unigraph const &)>;
|
||||
|
||||
/// @brief Rates every mutated subgraph with a `Rating::Func`.
|
||||
/// @param m The mutated subgraphs to be rated.
|
||||
/// @param f A function that takes a mutated subgraph as input
|
||||
/// and returns its score.
|
||||
Rating(Mutation<PartitionType> &&m, Func const &f)
|
||||
: parts(std::move(m.parts)) {
|
||||
|
||||
for (auto &sub : parts)
|
||||
if (sub.mutants.size() > 1) {
|
||||
auto sum = 0.0f;
|
||||
for (auto &c : sub.mutants)
|
||||
sum += (c.score = f(c.graph));
|
||||
sum = std::abs(sum);
|
||||
for (auto &c : sub.mutants)
|
||||
c.score /= sum;
|
||||
std::sort(sub.mutants.begin(), sub.mutants.end(),
|
||||
[](auto const &a, auto const &b) {
|
||||
return a.score > b.score;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Returns mutant vector size.
|
||||
/// @return 2D vector size.
|
||||
Vec<size_t> size() const { return list_size(parts); }
|
||||
|
||||
/// @brief Builds `Unigraph` from the subgraphs
|
||||
/// with specified indices.
|
||||
/// @param indices Subgraph indices.
|
||||
/// @return Merged `Unigraph`.
|
||||
Unigraph build(Vec<size_t> const &indices) const {
|
||||
const auto size = indices.size();
|
||||
if (size != parts.size())
|
||||
throw "indices size wrong";
|
||||
Unigraph ans;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
for (const auto &op :
|
||||
parts.at(i).mutants.at(indices[i]).graph.operators)
|
||||
ans.push_operator(op.op_type, op.inputs, op.outputs);
|
||||
return ans;
|
||||
}
|
||||
};
|
||||
|
||||
template <class t> Vec<size_t> list_size(Vec<SubGraph<t>> const &list) {
|
||||
Vec<size_t> ans(list.size());
|
||||
std::transform(list.begin(), list.end(), ans.begin(),
|
||||
[](const auto &e) { return e.mutants.size(); });
|
||||
return ans;
|
||||
}
|
||||
|
||||
} // namespace optimization
|
|
@ -0,0 +1,196 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace optimization {
|
||||
|
||||
enum class OpType : uint16_t {
|
||||
Abs,
|
||||
Acos,
|
||||
Acosh,
|
||||
Add,
|
||||
And,
|
||||
ArgMax,
|
||||
Asin,
|
||||
Asinh,
|
||||
Atan,
|
||||
Atanh,
|
||||
AveragePool,
|
||||
BatchNormalization,
|
||||
Bernoulli,
|
||||
BitShift,
|
||||
BitwiseAnd,
|
||||
BitwiseNot,
|
||||
BitwiseOr,
|
||||
BitwiseXor,
|
||||
BlackmanWindow,
|
||||
Cast,
|
||||
CastLike,
|
||||
Ceil,
|
||||
Celu,
|
||||
CenterCropPad,
|
||||
Clip,
|
||||
Col2lm,
|
||||
Compress,
|
||||
Concat,
|
||||
ConcatFromSequence,
|
||||
// Constant, // -> Input
|
||||
ConstantOfShape,
|
||||
Conv,
|
||||
ConvInteger,
|
||||
ConvTranspose,
|
||||
Cos,
|
||||
Cosh,
|
||||
CumSum,
|
||||
DFT,
|
||||
DeformConv,
|
||||
DepthToSpace,
|
||||
DequantizeLinear,
|
||||
Det,
|
||||
Div,
|
||||
Dropout,
|
||||
DynamicQuantizeLinear,
|
||||
Einsum,
|
||||
Elu,
|
||||
Equal,
|
||||
Erf,
|
||||
Exp,
|
||||
Expand,
|
||||
EyeLike,
|
||||
Flatten,
|
||||
Floor,
|
||||
GRU,
|
||||
Gather,
|
||||
GatherElements,
|
||||
GatherND,
|
||||
Gemm,
|
||||
GlobalAveragePool,
|
||||
GlobalLpPool,
|
||||
GlobalMaxPool,
|
||||
Greater,
|
||||
GreaterOrEqual,
|
||||
GridSample,
|
||||
GroupNormalization,
|
||||
HammingWindow,
|
||||
HannWindow,
|
||||
HardSigmoid,
|
||||
HardSwish,
|
||||
Hardmax,
|
||||
Identity,
|
||||
If,
|
||||
InstanceNormalization,
|
||||
IsInf,
|
||||
IsNaN,
|
||||
LRN,
|
||||
LSTM,
|
||||
LayerNormalization,
|
||||
LeakyRelu,
|
||||
Less,
|
||||
LessOrEqual,
|
||||
Log,
|
||||
LogSoftmax,
|
||||
Loop,
|
||||
LpNormalization,
|
||||
LpPool,
|
||||
MatMul,
|
||||
MatMulInteger,
|
||||
Max,
|
||||
MaxPool,
|
||||
MaxRoiPool,
|
||||
MaxUnpool,
|
||||
Mean,
|
||||
MeanVarianceNormalization,
|
||||
MelWeightMatrix,
|
||||
Min,
|
||||
Mish,
|
||||
Mod,
|
||||
Mul,
|
||||
Multinomial,
|
||||
Neg,
|
||||
NegativeLogLikelihoodLoss,
|
||||
NonMaxSuppression,
|
||||
NonZero,
|
||||
Not,
|
||||
OneHot,
|
||||
Optional,
|
||||
OptionalGetElement,
|
||||
OptionalHasElement,
|
||||
Or,
|
||||
PRelu,
|
||||
Pad,
|
||||
Pow,
|
||||
QLinearConv,
|
||||
QLinearMatMul,
|
||||
QuantizeLinear,
|
||||
RNN,
|
||||
RandomNormal,
|
||||
RandomNormalLike,
|
||||
RandomUniform,
|
||||
RandomUniformLike,
|
||||
Range,
|
||||
Reciprocal,
|
||||
ReduceL1,
|
||||
ReduceL2,
|
||||
ReduceLogSum,
|
||||
ReduceLogSumExp,
|
||||
ReduceMax,
|
||||
ReduceMean,
|
||||
ReduceMin,
|
||||
ReduceProd,
|
||||
ReduceSum,
|
||||
ReduceSumSquare,
|
||||
Relu,
|
||||
Reshape,
|
||||
Resize,
|
||||
ReverseSequence,
|
||||
RoiAlign,
|
||||
Round,
|
||||
STFT,
|
||||
Scan,
|
||||
Scatter,
|
||||
ScatterElements,
|
||||
ScatterND,
|
||||
Selu,
|
||||
SequenceAt,
|
||||
SequenceConstruct,
|
||||
SequenceEmpty,
|
||||
SequenceErase,
|
||||
SequenceInsert,
|
||||
SequenceLength,
|
||||
SequenceMap,
|
||||
Shape,
|
||||
Shrink,
|
||||
Sigmoid,
|
||||
Sign,
|
||||
Sin,
|
||||
Sinh,
|
||||
Size,
|
||||
Slice,
|
||||
Softmax,
|
||||
SoftmaxCrossEntropyLoss,
|
||||
Softplus,
|
||||
Softsign,
|
||||
SpaceToDepth,
|
||||
Split,
|
||||
SplitToSequence,
|
||||
Sqrt,
|
||||
Squeeze,
|
||||
StringNormalizer,
|
||||
Sub,
|
||||
Sum,
|
||||
Tan,
|
||||
Tanh,
|
||||
TfIdfVectorizer,
|
||||
ThresholdedRelu,
|
||||
Tile,
|
||||
TopK,
|
||||
Transpose,
|
||||
Trilu,
|
||||
Unique,
|
||||
Unsqueeze,
|
||||
Upsample,
|
||||
Where,
|
||||
Xor,
|
||||
};
|
||||
|
||||
} // namespace optimization
|
|
@ -0,0 +1,22 @@
|
|||
#pragma once
|
||||
|
||||
#include "../graph.h"
|
||||
|
||||
namespace optimization {
|
||||
|
||||
class Conv {
|
||||
Operator const &op;
|
||||
|
||||
public:
|
||||
explicit Conv(Operator &op) : op(op) {}
|
||||
explicit Conv(Operator const &op) : op(op) {}
|
||||
|
||||
Arc<Tensor> const &input() const { return op.inputs.at(0); }
|
||||
Arc<Tensor> const &kernel() const { return op.inputs.at(1); }
|
||||
Arc<Tensor> const &dilations() const { return op.inputs.at(2); }
|
||||
Arc<Tensor> const &pads() const { return op.inputs.at(3); }
|
||||
Arc<Tensor> const &strides() const { return op.inputs.at(4); }
|
||||
Arc<Tensor> const &output() const { return op.outputs.at(0); }
|
||||
};
|
||||
|
||||
} // namespace optimization
|
|
@ -0,0 +1,197 @@
|
|||
#include "single_operator.h"
|
||||
#include "../operator/conv.h"
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <numeric>
|
||||
|
||||
using namespace optimization;
|
||||
using namespace pass;
|
||||
|
||||
Vec<std::pair<Unigraph, SingleOperator>>
|
||||
optimization::pass::partition(Unigraph &&g) {
|
||||
Vec<std::pair<Unigraph, SingleOperator>> ans;
|
||||
for (auto &op : g.operators) {
|
||||
auto &[g, t] = ans.emplace_back();
|
||||
g.push_operator(op.op_type, op.inputs, op.outputs);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
// 1st: new shape
|
||||
// 2nd: permutation
|
||||
static std::pair<Vec<size_t>, Vec<size_t>> // fmt: new line
|
||||
transpose( //
|
||||
Vec<size_t> const &shape, //
|
||||
char const *src, // source tensor layout
|
||||
char const *tgt // target tensor layout
|
||||
) {
|
||||
// assert( shape.size() == str_len(src) == str_len(tgt) )
|
||||
std::map<char, size_t> indices;
|
||||
|
||||
for (size_t i = 0; i < shape.size(); ++i)
|
||||
indices[src[i]] = i;
|
||||
|
||||
auto ans = std::make_pair( // fmt: new line
|
||||
Vec<size_t>(shape.size()), // shape
|
||||
Vec<size_t>(shape.size()) // permutation
|
||||
);
|
||||
|
||||
for (auto i = 0; i < shape.size(); ++i)
|
||||
ans.first[i] = shape[ans.second[i] = indices[tgt[i]]];
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
Vec<Unigraph> optimization::pass::mutate( // fmt: new line
|
||||
Unigraph const &g, //
|
||||
SingleOperator const & //
|
||||
) {
|
||||
Vec<Unigraph> ans;
|
||||
switch (g.operators.front().op_type) {
|
||||
case OpType::Conv: {
|
||||
auto const conv = Conv(g.operators.front());
|
||||
auto const &i_shape = conv.input()->shape;
|
||||
auto const &k_shape = conv.kernel()->shape;
|
||||
auto const &dilations = conv.dilations()->to_vec<int64_t>();
|
||||
auto const &strides = conv.strides()->to_vec<int64_t>();
|
||||
// assert(conv.input()->data_type == conv.kernel()->data_type);
|
||||
auto const dt = conv.input()->data_type;
|
||||
if (k_shape.rbegin()[0] == 1 // fmt: new line
|
||||
&& k_shape.rbegin()[1] == 1 //
|
||||
&& i_shape[1] == k_shape[1] // group = 1
|
||||
&& std::all_of(strides.begin(), strides.end(),
|
||||
[](auto x) { return x == 1; })) {
|
||||
// 1x1 conv
|
||||
auto &mutant = ans.emplace_back();
|
||||
|
||||
// (input, "nchw"->"nhwc") -|transpose|-> tranposed -|reshape|-> t0
|
||||
Arc<Tensor> t0;
|
||||
{
|
||||
auto [shape_, permute_] = transpose(i_shape, "nchw", "nhwc");
|
||||
auto tranposed = Tensor::share(std::move(shape_), dt, {});
|
||||
auto permutation = Tensor::share_vec(std::move(permute_));
|
||||
mutant.push_operator(OpType::Transpose,
|
||||
{conv.input(), std::move(permutation)},
|
||||
{tranposed});
|
||||
mutant.push_operator(
|
||||
OpType::Reshape, {std::move(tranposed)},
|
||||
{t0 = Tensor::share(
|
||||
{shape_[0] * shape_[1] * shape_[2], shape_[3]}, dt,
|
||||
{})});
|
||||
}
|
||||
|
||||
// (kernel,"fcrs"->"cfrs") -|transpose|-> tranposed -|reshape|-> t1
|
||||
Arc<Tensor> t1;
|
||||
{
|
||||
auto [shape_, permute_] = transpose(k_shape, "fcrs", "cfrs");
|
||||
auto tranposed = Tensor::share(std::move(shape_), dt, {});
|
||||
auto permutation = Tensor::share_vec(std::move(permute_));
|
||||
mutant.push_operator(OpType::Transpose,
|
||||
{conv.kernel(), std::move(permutation)},
|
||||
{tranposed});
|
||||
mutant.push_operator(
|
||||
OpType::Reshape, {std::move(tranposed)},
|
||||
{t1 = Tensor::share(
|
||||
{shape_[0], shape_[1] /* * shape_[2] * shape_[3] */},
|
||||
dt, {})});
|
||||
}
|
||||
|
||||
// (t0,t1) -|matmul|-> x -|reshape|-> t2
|
||||
auto x = Tensor::share({t0->shape[0], t1->shape[1]}, dt, {});
|
||||
mutant.push_operator(OpType::MatMul, {std::move(t0), std::move(t1)},
|
||||
{x});
|
||||
auto t2 = Tensor::share(
|
||||
{i_shape[0], i_shape[2], i_shape[3], k_shape[0]}, dt, {});
|
||||
mutant.push_operator(OpType::Reshape, {std::move(x)}, {t2});
|
||||
|
||||
// (t2,"nhwf"->"nfhw") -|transpose|-> output
|
||||
{
|
||||
auto [shape_, permute_] = transpose(t2->shape, "nhwf", "nfhw");
|
||||
// auto tranposed = Tensor::share(std::move(shape_), dt, {});
|
||||
auto permutation = Tensor::share_vec(std::move(permute_));
|
||||
mutant.push_operator(OpType::Transpose,
|
||||
{std::move(t2), std::move(permutation)},
|
||||
{conv.output()});
|
||||
}
|
||||
} else if (
|
||||
// group = 1
|
||||
i_shape[1] == k_shape[1]
|
||||
// stride[*] = 1
|
||||
&& std::all_of(strides.begin(), strides.end(),
|
||||
[](auto x) { return x == 1; })
|
||||
// dilation[*] > 1
|
||||
&& std::any_of(dilations.begin(), dilations.end(),
|
||||
[](auto x) { return x > 1; })) {
|
||||
// dilated conv
|
||||
auto &mutant = ans.emplace_back();
|
||||
|
||||
auto t0 = Tensor::share(
|
||||
{
|
||||
i_shape[0],
|
||||
i_shape[1],
|
||||
i_shape[2] / dilations[0],
|
||||
static_cast<size_t>(dilations[0]),
|
||||
i_shape[3] / dilations[1],
|
||||
static_cast<size_t>(dilations[1]),
|
||||
},
|
||||
dt, {});
|
||||
mutant.push_operator(OpType::Reshape, {conv.input()}, {t0});
|
||||
|
||||
auto [shape_, permute_] = transpose(t0->shape, "nc1234", "n24c13");
|
||||
auto transposed = Tensor::share(shape_, dt, {});
|
||||
auto permutation = Tensor::share_vec(std::move(permute_));
|
||||
mutant.push_operator(OpType::Transpose,
|
||||
{std::move(t0), std::move(permutation)},
|
||||
{transposed});
|
||||
|
||||
auto t1 = Tensor::share(
|
||||
{
|
||||
shape_[0] * shape_[1] * shape_[2],
|
||||
shape_[3],
|
||||
shape_[4],
|
||||
shape_[5],
|
||||
},
|
||||
dt, {});
|
||||
mutant.push_operator(OpType::Reshape, {std::move(transposed)},
|
||||
{t1});
|
||||
|
||||
Vec<size_t> shape__{
|
||||
shape_[0] * shape_[1] * shape_[2],
|
||||
k_shape[1],
|
||||
conv.output()->shape[2] / shape_[1],
|
||||
conv.output()->shape[3] / shape_[2],
|
||||
};
|
||||
|
||||
auto t2 = Tensor::share(shape__, dt, {});
|
||||
mutant.push_operator(OpType::Conv,
|
||||
{
|
||||
std::move(t1),
|
||||
conv.kernel(),
|
||||
Tensor::share_vec<size_t>({1, 1}),
|
||||
conv.pads(),
|
||||
conv.strides(),
|
||||
},
|
||||
{t2});
|
||||
auto t3 = Tensor::share({shape_[0], shape_[1], shape_[2],
|
||||
shape__[1], shape__[2], shape__[3]},
|
||||
dt, {});
|
||||
mutant.push_operator(OpType::Reshape, {std::move(t2)}, {t3});
|
||||
|
||||
auto [shape___, permute__] =
|
||||
transpose(t3->shape, "n12chw", "nc1h2w");
|
||||
auto transposed_ = Tensor::share(shape___, dt, {});
|
||||
auto permutation_ = Tensor::share_vec(std::move(permute__));
|
||||
mutant.push_operator(OpType::Transpose,
|
||||
{std::move(t3), std::move(permutation_)},
|
||||
{transposed_});
|
||||
mutant.push_operator(OpType::Reshape, {std::move(t3)},
|
||||
{conv.output()});
|
||||
}
|
||||
} break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ans;
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
#pragma once
|
||||
|
||||
#include "../mutation.h"
|
||||
|
||||
namespace optimization::pass {
|
||||
|
||||
/// @brief Partition every operator as a `Unigraph`.
|
||||
struct SingleOperator {};
|
||||
|
||||
/// @brief Splits a graph into subgraphs, where each subgraph contains
|
||||
/// only a single operator.
|
||||
/// @param arg0 An unpartitioned graph.
|
||||
/// @return A vector of individual subgraphs.
|
||||
Vec<std::pair<Unigraph, SingleOperator>> partition(Unigraph &&);
|
||||
|
||||
/// @brief Mutates the single operator graph.
|
||||
/// @param g The subgraph.
|
||||
/// @param arg1 Never used.
|
||||
/// @return Mutants.
|
||||
Vec<Unigraph> mutate(Unigraph const &g, SingleOperator const &);
|
||||
|
||||
} // namespace optimization::pass
|
|
@ -0,0 +1,28 @@
|
|||
#include "tensor.h"
|
||||
#include <numeric>
|
||||
|
||||
using namespace optimization;
|
||||
|
||||
Arc<Tensor> Tensor::share(Vec<size_t> shape, DataType data_type, Data data) {
|
||||
return Arc<Tensor>(
|
||||
new Tensor(std::move(shape), std::move(data_type), std::move(data)));
|
||||
}
|
||||
|
||||
size_t Tensor::count() const {
|
||||
return shape.empty() // fmt: new line
|
||||
? 0
|
||||
: std::accumulate(shape.begin(), shape.end(), 1,
|
||||
[](auto acc, auto it) { return acc * it; });
|
||||
}
|
||||
|
||||
size_t Tensor::size() const {
|
||||
return shape.empty() // fmt: new line
|
||||
? 0
|
||||
: std::accumulate(shape.begin(), shape.end(), data_type.size(),
|
||||
[](auto acc, auto it) { return acc * it; });
|
||||
}
|
||||
|
||||
Tensor::Tensor(Vec<size_t> &&shape, DataType &&data_type, Data &&data)
|
||||
: shape(std::move(shape)), // fmt: new line
|
||||
data_type(std::move(data_type)), //
|
||||
data(std::move(data)) {}
|
|
@ -0,0 +1,87 @@
|
|||
#pragma once
|
||||
|
||||
#include "data.h"
|
||||
#include "data_type.h"
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace optimization {
|
||||
|
||||
/// @brief Defines a template alias for `std::vector`.
|
||||
template <class t> using Vec = std::vector<t>;
|
||||
|
||||
/// @brief Defines a template alias for std::shared_ptr
|
||||
template <class t> using Arc = std::shared_ptr<t>;
|
||||
|
||||
/// @brief A tensor represented by its position in `Unigraph`.
|
||||
struct TensorPos {
|
||||
/// @brief `op` for `Operator` index in `Unigraph`.
|
||||
/// `idx` for index in `Operator` inputs or outputs.
|
||||
size_t op, idx;
|
||||
};
|
||||
|
||||
/// @brief A struct to represent a tensor in the computation graph.
|
||||
/// The ownership of a `Tensor` belongs to all the operators
|
||||
/// that generate it or it passed to.
|
||||
struct Tensor {
|
||||
/// @brief Tensor shape.
|
||||
Vec<size_t> shape;
|
||||
|
||||
/// @brief Element data type.
|
||||
DataType data_type;
|
||||
|
||||
/// @brief Data of tensor.
|
||||
Data data;
|
||||
|
||||
/// @brief Operators in different `Unigraph` that generate this tensor.
|
||||
std::unordered_map<size_t, TensorPos> source;
|
||||
|
||||
/// @brief Operators in different `Unigraph` that take this tensor as input.
|
||||
std::unordered_map<size_t, Vec<TensorPos>> target;
|
||||
|
||||
/// @brief A static factory method to create a `shared_ptr<Tensor>`.
|
||||
/// @param shape Tensor shape.
|
||||
/// @param data_type Element data type.
|
||||
/// @param data Data.
|
||||
/// @return A `shared_ptr<Tensor>`.
|
||||
static Arc<Tensor> share(Vec<size_t> shape, DataType data_type, Data data);
|
||||
|
||||
/// @brief A static factory method to create a `shared_ptr<Tensor>` with
|
||||
/// single data.
|
||||
/// @tparam t Data type.
|
||||
/// @param val Data value.
|
||||
/// @return A `shared_ptr<Tensor>`.
|
||||
template <class t> static Arc<Tensor> share_single(t val) {
|
||||
return Tensor::share({1}, ty<t>(), Data::cpu<t>({val}));
|
||||
}
|
||||
|
||||
/// @brief A static factory method to create a `shared_ptr<Tensor>` with
|
||||
/// 1D data.
|
||||
/// @tparam t Data type.
|
||||
/// @param val Data value.
|
||||
/// @return A `shared_ptr<Tensor>`.
|
||||
template <class t> static Arc<Tensor> share_vec(Vec<t> val) {
|
||||
return Tensor::share({val.size()}, ty<t>(),
|
||||
Data::cpu<t>(std::move(val)));
|
||||
}
|
||||
|
||||
/// @brief Calculates count of data in this tensor.
|
||||
/// @return Data count.
|
||||
size_t count() const;
|
||||
|
||||
/// @brief Calculates the size of the tensor in bytes.
|
||||
/// @return Memory usage in bytes.
|
||||
size_t size() const;
|
||||
|
||||
/// @brief Copies tensor data to a `Vec`.
|
||||
/// @tparam t Data type.
|
||||
/// @return The data `Vec`.
|
||||
template <class t> Vec<t> to_vec() const { return data.to_vec<t>(count()); }
|
||||
|
||||
private:
|
||||
/// @brief Constructor is private and only accessible by the factory method.
|
||||
Tensor(Vec<size_t> &&, DataType &&, Data &&);
|
||||
};
|
||||
|
||||
} // namespace optimization
|
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
/// @brief Stores tensor data。
|
||||
struct Data {
|
||||
/// @brief `cpu_data` is stored in the memory space,
|
||||
/// which allows it to be managed using `std::vector<uint8_t>`.
|
||||
std::vector<uint8_t> cpu_data;
|
||||
|
||||
// #ifdef USE_CUDA
|
||||
// void *gpu_data;
|
||||
// #endif
|
||||
// #ifdef USE_BANG
|
||||
// void *mlu_data;
|
||||
// #endif
|
||||
|
||||
/// @brief Builds `Data` from `vector` os any type `t`.
|
||||
/// @tparam t Data type.
|
||||
/// @param data Data `vector`.
|
||||
/// @return `Data` object.
|
||||
template <class t> static Data cpu(std::vector<t> data) {
|
||||
Data ans{std::vector<uint8_t>(sizeof(t) * data.size())};
|
||||
memcpy(ans.cpu_data.data(), data.data(), ans.cpu_data.size());
|
||||
return ans;
|
||||
}
|
||||
};
|
|
@ -0,0 +1,32 @@
|
|||
#include "data_type.h"
|
||||
|
||||
size_t DataType::size() const {
|
||||
switch (id) {
|
||||
case DataTypeId::FLOAT:
|
||||
return sizeof(float);
|
||||
case DataTypeId::UINT8:
|
||||
return sizeof(uint8_t);
|
||||
case DataTypeId::INT8:
|
||||
return sizeof(int8_t);
|
||||
case DataTypeId::UINT16:
|
||||
return sizeof(uint16_t);
|
||||
case DataTypeId::INT16:
|
||||
return sizeof(int16_t);
|
||||
case DataTypeId::INT32:
|
||||
return sizeof(int32_t);
|
||||
case DataTypeId::INT64:
|
||||
return sizeof(int64_t);
|
||||
case DataTypeId::BOOL:
|
||||
return sizeof(bool);
|
||||
case DataTypeId::FLOAT16:
|
||||
return 2;
|
||||
case DataTypeId::DOUBLE:
|
||||
return sizeof(double);
|
||||
case DataTypeId::UINT32:
|
||||
return sizeof(uint32_t);
|
||||
case DataTypeId::UINT64:
|
||||
return sizeof(uint64_t);
|
||||
default:
|
||||
throw "unsupported data type.";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
enum class DataTypeId : uint8_t {
|
||||
UNDEFINED,
|
||||
FLOAT,
|
||||
UINT8,
|
||||
INT8,
|
||||
UINT16,
|
||||
INT16,
|
||||
INT32,
|
||||
INT64,
|
||||
STRING,
|
||||
BOOL,
|
||||
FLOAT16,
|
||||
DOUBLE,
|
||||
UINT32,
|
||||
UINT64,
|
||||
// COMPLEX64,
|
||||
// COMPLEX128,
|
||||
// BFLOAT16,
|
||||
};
|
||||
|
||||
struct DataType {
|
||||
DataTypeId id;
|
||||
|
||||
size_t size() const;
|
||||
};
|
||||
|
||||
template <class t> DataType ty();
|
||||
template <> inline DataType ty<float>() { return {DataTypeId::FLOAT}; }
|
||||
template <> inline DataType ty<uint8_t>() { return {DataTypeId::UINT8}; }
|
||||
template <> inline DataType ty<int8_t>() { return {DataTypeId::INT8}; }
|
||||
template <> inline DataType ty<uint16_t>() { return {DataTypeId::UINT16}; }
|
||||
template <> inline DataType ty<int16_t>() { return {DataTypeId::INT16}; }
|
||||
template <> inline DataType ty<int32_t>() { return {DataTypeId::INT32}; }
|
||||
template <> inline DataType ty<int64_t>() { return {DataTypeId::INT64}; }
|
||||
template <> inline DataType ty<bool>() { return {DataTypeId::BOOL}; }
|
||||
template <> inline DataType ty<double>() { return {DataTypeId::DOUBLE}; }
|
||||
template <> inline DataType ty<uint32_t>() { return {DataTypeId::UINT32}; }
|
||||
template <> inline DataType ty<uint64_t>() { return {DataTypeId::UINT64}; }
|
|
@ -0,0 +1,196 @@
|
|||
#pragma once
|
||||
|
||||
#include <numeric>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "data.h"
|
||||
#include "data_type.h"
|
||||
#include "op_type.h"
|
||||
|
||||
/// @brief A tensor represented by which `node` it is passed to
|
||||
/// and at which `slot` in inputs of that `node`.
|
||||
struct InletPos {
|
||||
size_t node, slot;
|
||||
};
|
||||
|
||||
/// @brief A tensor represented by which `node` it is generated from
|
||||
/// and at which `slot` in outputs of that `node`.
|
||||
struct OutletPos {
|
||||
size_t node, slot;
|
||||
};
|
||||
|
||||
/// @brief Calculates the hash of `OutletPos`.
|
||||
struct OutletPosHash {
|
||||
size_t operator()(OutletPos const &o) const {
|
||||
return o.node ^ (o.slot << 1);
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief The data structure represents a `Outlet` of a operator,
|
||||
/// which generates a tensor, and it is part of the `Node`.
|
||||
/// @tparam Tensor discripter.
|
||||
template <class Tensor> struct Outlet {
|
||||
Tensor info;
|
||||
std::vector<InletPos> targets;
|
||||
|
||||
explicit Outlet(Tensor info) : info(info), targets({}) {}
|
||||
};
|
||||
|
||||
/// @brief The specific tensor information excludes all unknowns.
|
||||
/// This struct can be used as a tensor discripter type in templates.
|
||||
struct TensorInfo {
|
||||
std::vector<size_t> shape;
|
||||
DataType data_type;
|
||||
|
||||
/// @brief Tensor memory usage.
|
||||
/// @return Memory bytes.
|
||||
size_t size() const {
|
||||
return shape.empty() // fmt: new line
|
||||
? 0
|
||||
: std::accumulate(
|
||||
shape.begin(), shape.end(), data_type.size(),
|
||||
[](auto acc, auto it) { return acc * it; });
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief Operator `Node` of the dataflow `Graph`.
|
||||
/// @tparam Tensor discripter.
|
||||
template <class Tensor> struct Node {
|
||||
OpType op_type;
|
||||
std::vector<OutletPos> inputs;
|
||||
std::vector<Outlet<Tensor>> outputs;
|
||||
};
|
||||
|
||||
/// @brief A reference of an operator `Node` in a dataflow `Graph`.
|
||||
struct OpRef {
|
||||
/// @brief Index of operator `Node` in the corresponding `Graph`.
|
||||
size_t node_idx;
|
||||
|
||||
/// @brief Builds `OutletPos` from `OpRef`.
|
||||
/// @param slot Index of output for operator `Node`.
|
||||
/// @return An `OutletPos`.
|
||||
OutletPos operator[](size_t slot) const { return {node_idx, slot}; }
|
||||
};
|
||||
|
||||
/// @brief The dataflow `Graph`.
|
||||
/// @tparam Tensor discripter.
|
||||
///
|
||||
/// **NOTICE** Methods of a template class must be defined in the same file
|
||||
/// as the class.
|
||||
template <class Tensor> class Graph {
|
||||
/// @brief `operators` must be topo sorted.
|
||||
std::vector<Node<Tensor>> _operators;
|
||||
|
||||
/// @brief A map to store data, where the key is the index of input node,
|
||||
/// and the value is data.
|
||||
std::unordered_map<size_t, Data> _data;
|
||||
|
||||
/// @brief
|
||||
std::unordered_map<size_t, size_t> _io_id;
|
||||
|
||||
static size_t IO_ID;
|
||||
|
||||
public:
|
||||
/// @brief Pushs a new operator `Node` into `Graph`.
|
||||
/// @param op_type Operator type.
|
||||
/// @param inputs Tensors passed to operator.
|
||||
/// @param outputs Tensors generated by operator.
|
||||
/// @return A reference to the `Node` in `Graph`.
|
||||
OpRef push_operator( // fmt: new line
|
||||
OpType op_type, //
|
||||
std::vector<OutletPos> inputs, //
|
||||
std::vector<Outlet<Tensor>> outputs //
|
||||
) {
|
||||
if (op_type == OpType::Input)
|
||||
throw "use `push_input` instead";
|
||||
else if (op_type == OpType::Output)
|
||||
throw "use `push_output` instead";
|
||||
|
||||
auto index = _operators.size();
|
||||
|
||||
for (const auto &input : inputs)
|
||||
if (input.node >= index)
|
||||
throw "input node not exist";
|
||||
|
||||
size_t i = 0;
|
||||
for (const auto &input : inputs)
|
||||
_operators[input.node] // fmt: new line
|
||||
.outputs[input.slot] //
|
||||
.targets //
|
||||
.push_back({index, ++i});
|
||||
|
||||
_operators.push_back({op_type, std::move(inputs), std::move(outputs)});
|
||||
return {index};
|
||||
}
|
||||
|
||||
/// @brief Pushs a new `Input` `Node` into `Graph`.
|
||||
/// @param output Tensor from `Input`.
|
||||
/// @param id IO id of `Input`.
|
||||
/// @return A reference to the `Node` in `Graph`.
|
||||
OpRef push_input(Outlet<Tensor> output, std::optional<size_t> id) {
|
||||
auto index = _operators.size();
|
||||
_io_id[index] = id ? *id : IO_ID++;
|
||||
_operators.push_back({OpType::Input, {}, {output}});
|
||||
return {index};
|
||||
}
|
||||
|
||||
/// @brief Pushs a new `Output` `Node` into `Graph`.
|
||||
/// @param input Tensor to `Output`.
|
||||
/// @param id IO id of `Output`.
|
||||
/// @return A reference to the `Node` in `Graph`.
|
||||
OpRef push_output(OutletPos input, std::optional<size_t> id) {
|
||||
auto index = _operators.size();
|
||||
_io_id[index] = id ? *id : IO_ID++;
|
||||
_operators.push_back({OpType::Output, {input}, {}});
|
||||
return {index};
|
||||
}
|
||||
|
||||
/// @brief Pushs data of `Input` `Node` into `Graph`.
|
||||
/// @param input A reference to the `Input` `Node`.
|
||||
/// @param data Data to store.
|
||||
void push_data(OpRef const &input, Data data) {
|
||||
if (input.node_idx >= _operators.size())
|
||||
throw "input node not exist";
|
||||
const auto &op = _operators.at(input.node_idx);
|
||||
if (op.op_type != OpType::Input)
|
||||
throw "only input node can have data";
|
||||
if (!data.cpu_data.empty() &&
|
||||
data.cpu_data.size() != op.outputs.front().info.size())
|
||||
throw "wrong data size";
|
||||
_data[input.node_idx] = std::move(data);
|
||||
}
|
||||
|
||||
/// @brief Gets operators in the `Graph`.
|
||||
/// @return Operators in the `Graph`.
|
||||
std::vector<Node<Tensor>> const &operators() const { return _operators; }
|
||||
|
||||
/// @brief `Graph` inputs.
|
||||
/// @return Indices of input `Node`s in `Graph`.
|
||||
std::vector<size_t> inputs() const {
|
||||
std::vector<size_t> ans;
|
||||
size_t i = 0;
|
||||
for (const auto &node : _operators) {
|
||||
if (node.op_type == OpType::Input && _data.find(i) != _data.end())
|
||||
ans.push_back(i);
|
||||
++i;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
/// @brief `Graph` outputs.
|
||||
/// @return Indices of output `Node`s in `Graph`.
|
||||
std::vector<size_t> outputs() const {
|
||||
std::vector<size_t> ans;
|
||||
size_t i = 0;
|
||||
for (const auto &node : _operators) {
|
||||
if (node.op_type == OpType::Output)
|
||||
ans.push_back(i);
|
||||
++i;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Tensor> size_t Graph<Tensor>::IO_ID = 0;
|
|
@ -0,0 +1,193 @@
|
|||
#pragma once
|
||||
|
||||
enum class OpType : uint16_t {
|
||||
Input,
|
||||
Output,
|
||||
|
||||
Abs,
|
||||
Acos,
|
||||
Acosh,
|
||||
Add,
|
||||
And,
|
||||
ArgMax,
|
||||
Asin,
|
||||
Asinh,
|
||||
Atan,
|
||||
Atanh,
|
||||
AveragePool,
|
||||
BatchNormalization,
|
||||
Bernoulli,
|
||||
BitShift,
|
||||
BitwiseAnd,
|
||||
BitwiseNot,
|
||||
BitwiseOr,
|
||||
BitwiseXor,
|
||||
BlackmanWindow,
|
||||
Cast,
|
||||
CastLike,
|
||||
Ceil,
|
||||
Celu,
|
||||
CenterCropPad,
|
||||
Clip,
|
||||
Col2lm,
|
||||
Compress,
|
||||
Concat,
|
||||
ConcatFromSequence,
|
||||
// Constant, // -> Input
|
||||
ConstantOfShape,
|
||||
Conv,
|
||||
ConvInteger,
|
||||
ConvTranspose,
|
||||
Cos,
|
||||
Cosh,
|
||||
CumSum,
|
||||
DFT,
|
||||
DeformConv,
|
||||
DepthToSpace,
|
||||
DequantizeLinear,
|
||||
Det,
|
||||
Div,
|
||||
Dropout,
|
||||
DynamicQuantizeLinear,
|
||||
Einsum,
|
||||
Elu,
|
||||
Equal,
|
||||
Erf,
|
||||
Exp,
|
||||
Expand,
|
||||
EyeLike,
|
||||
Flatten,
|
||||
Floor,
|
||||
GRU,
|
||||
Gather,
|
||||
GatherElements,
|
||||
GatherND,
|
||||
Gemm,
|
||||
GlobalAveragePool,
|
||||
GlobalLpPool,
|
||||
GlobalMaxPool,
|
||||
Greater,
|
||||
GreaterOrEqual,
|
||||
GridSample,
|
||||
GroupNormalization,
|
||||
HammingWindow,
|
||||
HannWindow,
|
||||
HardSigmoid,
|
||||
HardSwish,
|
||||
Hardmax,
|
||||
Identity,
|
||||
If,
|
||||
InstanceNormalization,
|
||||
IsInf,
|
||||
IsNaN,
|
||||
LRN,
|
||||
LSTM,
|
||||
LayerNormalization,
|
||||
LeakyRelu,
|
||||
Less,
|
||||
LessOrEqual,
|
||||
Log,
|
||||
LogSoftmax,
|
||||
Loop,
|
||||
LpNormalization,
|
||||
LpPool,
|
||||
MatMul,
|
||||
MatMulInteger,
|
||||
Max,
|
||||
MaxPool,
|
||||
MaxRoiPool,
|
||||
MaxUnpool,
|
||||
Mean,
|
||||
MeanVarianceNormalization,
|
||||
MelWeightMatrix,
|
||||
Min,
|
||||
Mish,
|
||||
Mod,
|
||||
Mul,
|
||||
Multinomial,
|
||||
Neg,
|
||||
NegativeLogLikelihoodLoss,
|
||||
NonMaxSuppression,
|
||||
NonZero,
|
||||
Not,
|
||||
OneHot,
|
||||
Optional,
|
||||
OptionalGetElement,
|
||||
OptionalHasElement,
|
||||
Or,
|
||||
PRelu,
|
||||
Pad,
|
||||
Pow,
|
||||
QLinearConv,
|
||||
QLinearMatMul,
|
||||
QuantizeLinear,
|
||||
RNN,
|
||||
RandomNormal,
|
||||
RandomNormalLike,
|
||||
RandomUniform,
|
||||
RandomUniformLike,
|
||||
Range,
|
||||
Reciprocal,
|
||||
ReduceL1,
|
||||
ReduceL2,
|
||||
ReduceLogSum,
|
||||
ReduceLogSumExp,
|
||||
ReduceMax,
|
||||
ReduceMean,
|
||||
ReduceMin,
|
||||
ReduceProd,
|
||||
ReduceSum,
|
||||
ReduceSumSquare,
|
||||
Relu,
|
||||
Reshape,
|
||||
Resize,
|
||||
ReverseSequence,
|
||||
RoiAlign,
|
||||
Round,
|
||||
STFT,
|
||||
Scan,
|
||||
Scatter,
|
||||
ScatterElements,
|
||||
ScatterND,
|
||||
Selu,
|
||||
SequenceAt,
|
||||
SequenceConstruct,
|
||||
SequenceEmpty,
|
||||
SequenceErase,
|
||||
SequenceInsert,
|
||||
SequenceLength,
|
||||
SequenceMap,
|
||||
Shape,
|
||||
Shrink,
|
||||
Sigmoid,
|
||||
Sign,
|
||||
Sin,
|
||||
Sinh,
|
||||
Size,
|
||||
Slice,
|
||||
Softmax,
|
||||
SoftmaxCrossEntropyLoss,
|
||||
Softplus,
|
||||
Softsign,
|
||||
SpaceToDepth,
|
||||
Split,
|
||||
SplitToSequence,
|
||||
Sqrt,
|
||||
Squeeze,
|
||||
StringNormalizer,
|
||||
Sub,
|
||||
Sum,
|
||||
Tan,
|
||||
Tanh,
|
||||
TfIdfVectorizer,
|
||||
ThresholdedRelu,
|
||||
Tile,
|
||||
TopK,
|
||||
Transpose,
|
||||
Trilu,
|
||||
Unique,
|
||||
Unsqueeze,
|
||||
Upsample,
|
||||
Where,
|
||||
Xor,
|
||||
};
|
|
@ -0,0 +1,47 @@
|
|||
#include "../src/graph.h"
|
||||
#include <iostream>
|
||||
|
||||
int main() {
|
||||
try {
|
||||
Graph<TensorInfo> g;
|
||||
auto a = g.push_input( // fmt: new line
|
||||
Outlet(TensorInfo{{1, 1, 2, 3}, ty<float>()}), // output
|
||||
std::nullopt // id
|
||||
);
|
||||
g.push_data(a, Data::cpu<float>({1, 2, 3, 4, 5, 6}));
|
||||
|
||||
auto b = g.push_input( // fmt: new line
|
||||
Outlet(TensorInfo{{1, 1, 3, 1}, ty<float>()}), // output
|
||||
std::nullopt // id
|
||||
);
|
||||
g.push_data(b, Data::cpu<float>({1, 2, 3}));
|
||||
|
||||
auto matmul = g.push_operator( // fmt: new line
|
||||
OpType::MatMul, // op_type
|
||||
{a[0], b[0]}, // inputs
|
||||
{Outlet(TensorInfo{{1, 1, 2, 1}, ty<float>()})} // outputs
|
||||
);
|
||||
|
||||
g.push_output( // fmt: new line
|
||||
matmul[0], // input
|
||||
std::nullopt // id
|
||||
);
|
||||
|
||||
std::cout << "inputs: ";
|
||||
for (auto it : g.inputs()) {
|
||||
std::cout << it << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "outputs: ";
|
||||
for (auto it : g.outputs()) {
|
||||
std::cout << it << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
return 0;
|
||||
} catch (const char *e) {
|
||||
std::cerr << "[ERROR] " << e << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
#include "../include/optimization/common.h"
|
||||
#include <iostream>
|
||||
#include <unordered_set>
|
||||
|
||||
using namespace optimization;
|
||||
|
||||
int main() {
|
||||
try {
|
||||
Unigraph g;
|
||||
auto a = Tensor::share( // fmt: new line
|
||||
{1, 1, 2, 3}, //
|
||||
ty<float>(), //
|
||||
Data::cpu<float>({1, 2, 3, 4, 5, 6}));
|
||||
|
||||
auto b = Tensor::share( // fmt: new line
|
||||
{1, 1, 3, 1}, //
|
||||
ty<float>(), //
|
||||
Data::cpu<float>({1, 2, 3}));
|
||||
|
||||
auto c = Tensor::share( // fmt: new line
|
||||
{1, 1, 2, 1}, //
|
||||
ty<float>(), //
|
||||
{});
|
||||
|
||||
auto matmul = g.push_operator( // fmt: new line
|
||||
OpType::MatMul, // op_type
|
||||
{a, b}, // inputs
|
||||
{c} // outputs
|
||||
);
|
||||
|
||||
auto p = Partition<pass::SingleOperator>(std::move(g), pass::partition);
|
||||
auto m = Mutation<pass::SingleOperator>(
|
||||
std::move(p),
|
||||
[](const auto &g, const auto &t) { return Vec<Unigraph>{}; });
|
||||
auto r = Rating<pass::SingleOperator>(std::move(m), memory_usage);
|
||||
auto ans = r.build(Vec<size_t>(r.size().size(), 0));
|
||||
|
||||
return 0;
|
||||
} catch (const char *e) {
|
||||
std::cerr << "[ERROR] " << e << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -1,4 +1,13 @@
|
|||
#include "core/graph.h"
|
||||
#include "operators/concat.h"
|
||||
#include "operators/conv.h"
|
||||
#include "operators/gather.h"
|
||||
#include "operators/matmul.h"
|
||||
#include "operators/pad.h"
|
||||
#include "operators/pooling.h"
|
||||
#include "operators/reduce_mean.h"
|
||||
#include "operators/unary.h"
|
||||
#include "optimization/common.h"
|
||||
#include <algorithm>
|
||||
#include <queue>
|
||||
|
||||
|
@ -114,13 +123,394 @@ bool GraphObj::topo_sort() {
|
|||
return this->sorted = true;
|
||||
}
|
||||
|
||||
void GraphObj::optimize() {
|
||||
for (auto &op : ops) {
|
||||
optimization::DataType cast(DataType ty) {
|
||||
#define IT(A, B) \
|
||||
if (ty == DataType::A) \
|
||||
return {optimization::DataTypeId::B};
|
||||
|
||||
IT(Float32, FLOAT) //
|
||||
else IT(UInt32, UINT32) //
|
||||
else IT(UInt8, UINT8) //
|
||||
else IT(Int8, INT8) //
|
||||
else IT(UInt16, UINT16) //
|
||||
else IT(Int16, INT16) //
|
||||
else IT(Int32, INT32) //
|
||||
else IT(Int64, INT64) //
|
||||
else IT_ASSERT(false, "unsupported data type");
|
||||
|
||||
#undef IT
|
||||
}
|
||||
|
||||
DataType cast(optimization::DataType ty) {
|
||||
#define IT(A, B) \
|
||||
if (optimization::DataTypeId::A == ty.id) \
|
||||
return {DataType::B};
|
||||
|
||||
IT(FLOAT, Float32) //
|
||||
else IT(UINT32, UInt32) //
|
||||
else IT(UINT8, UInt8) //
|
||||
else IT(INT8, Int8) //
|
||||
else IT(UINT16, UInt16) //
|
||||
else IT(INT16, Int16) //
|
||||
else IT(INT32, Int32) //
|
||||
else IT(INT64, Int64) //
|
||||
else IT_ASSERT(false, "unsupported data type");
|
||||
|
||||
#undef IT
|
||||
}
|
||||
|
||||
optimization::Unigraph cast(GraphObj &g) {
|
||||
namespace opt = optimization;
|
||||
|
||||
g.topo_sort();
|
||||
|
||||
#define I(PTR) reinterpret_cast<uintptr_t>((PTR).get())
|
||||
|
||||
unordered_map<uintptr_t, opt::Arc<opt::Tensor>> tensors;
|
||||
for (const auto &t : g.getTensors()) {
|
||||
const auto dims = t->getDims();
|
||||
opt::Vec<size_t> shape(dims.size());
|
||||
std::transform(dims.begin(), dims.end(), shape.begin(),
|
||||
[](auto x) { return static_cast<size_t>(x); });
|
||||
|
||||
opt::Data data;
|
||||
if (t->hasData()) {
|
||||
auto ptr = t->getDataBlob()->getPtr<uint8_t *>();
|
||||
data = opt::Data(ptr, ptr + t->getBytes());
|
||||
}
|
||||
tensors[I(t)] =
|
||||
opt::Tensor::share(shape, cast(t->getDType()), std::move(data));
|
||||
}
|
||||
|
||||
opt::Unigraph ans;
|
||||
|
||||
for (const auto &op : g.getOperators()) {
|
||||
const auto inputs = op->getInputs(), outputs = op->getOutputs();
|
||||
opt::Vec<opt::Arc<opt::Tensor>> in(inputs.size()), out(outputs.size());
|
||||
std::transform(inputs.begin(), inputs.end(), in.begin(),
|
||||
[&](auto x) { return tensors[I(x)]; });
|
||||
std::transform(outputs.begin(), outputs.end(), out.begin(),
|
||||
[&](auto x) { return tensors[I(x)]; });
|
||||
switch (op->getOpType()) {
|
||||
case OpType::Abs:
|
||||
ans.push_operator(opt::OpType::Abs, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::ACos:
|
||||
ans.push_operator(opt::OpType::Acos, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::ACosH:
|
||||
ans.push_operator(opt::OpType::Acosh, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Add:
|
||||
ans.push_operator(opt::OpType::Add, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::And:
|
||||
ans.push_operator(opt::OpType::And, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::ASin:
|
||||
ans.push_operator(opt::OpType::Asin, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::ASinH:
|
||||
ans.push_operator(opt::OpType::Asinh, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::ATan:
|
||||
ans.push_operator(opt::OpType::Atan, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::ATanH:
|
||||
ans.push_operator(opt::OpType::Atanh, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::AvgPool: {
|
||||
auto obj = as<AvgPoolObj>(op);
|
||||
in.push_back(
|
||||
opt::Tensor::share_vec<int>({obj->getDh(), obj->getDw()}));
|
||||
in.push_back(
|
||||
opt::Tensor::share_vec<int>({obj->getKh(), obj->getKw()}));
|
||||
in.push_back(
|
||||
opt::Tensor::share_vec<int>({obj->getPh(), obj->getPw()}));
|
||||
in.push_back(
|
||||
opt::Tensor::share_vec<int>({obj->getSh(), obj->getSw()}));
|
||||
ans.push_operator(opt::OpType::AveragePool, std::move(in),
|
||||
std::move(out));
|
||||
} break;
|
||||
case OpType::BatchNorm:
|
||||
ans.push_operator(opt::OpType::BatchNormalization, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::BitLeftShift:
|
||||
in.push_back(opt::Tensor::share_single<uint8_t>(0));
|
||||
ans.push_operator(opt::OpType::BitShift, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::BitRightShift:
|
||||
in.push_back(opt::Tensor::share_single<uint8_t>(1));
|
||||
ans.push_operator(opt::OpType::BitShift, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::BitAnd:
|
||||
ans.push_operator(opt::OpType::BitwiseAnd, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::BitNot:
|
||||
ans.push_operator(opt::OpType::BitwiseNot, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::BitOr:
|
||||
ans.push_operator(opt::OpType::BitwiseOr, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::BitXor:
|
||||
ans.push_operator(opt::OpType::BitwiseXor, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Cast:
|
||||
ans.push_operator(opt::OpType::Cast, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Ceil:
|
||||
ans.push_operator(opt::OpType::Ceil, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Clip: {
|
||||
auto obj = as<ClipObj>(op);
|
||||
auto min = obj->getMin();
|
||||
auto max = obj->getMax();
|
||||
in.push_back(
|
||||
opt::Tensor::share_single<float>(min ? *min : -INFINITY));
|
||||
in.push_back(
|
||||
opt::Tensor::share_single<float>(max ? *max : INFINITY));
|
||||
ans.push_operator(opt::OpType::Clip, std::move(in), std::move(out));
|
||||
} break;
|
||||
case OpType::Concat:
|
||||
in.push_back(
|
||||
opt::Tensor::share_single<int>(as<ConcatObj>(op)->getDim()));
|
||||
ans.push_operator(opt::OpType::Concat, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Conv: {
|
||||
auto obj = as<ConvObj>(op);
|
||||
in.push_back(opt::Tensor::share_vec<size_t>(
|
||||
{(size_t)obj->getDh(), (size_t)obj->getDw()}));
|
||||
in.push_back(opt::Tensor::share_vec<size_t>(
|
||||
{(size_t)obj->getPh(), (size_t)obj->getPw()}));
|
||||
in.push_back(opt::Tensor::share_vec<size_t>(
|
||||
{(size_t)obj->getSh(), (size_t)obj->getSw()}));
|
||||
ans.push_operator(opt::OpType::Conv, std::move(in), std::move(out));
|
||||
} break;
|
||||
case OpType::Cos:
|
||||
ans.push_operator(opt::OpType::Cos, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::CosH:
|
||||
ans.push_operator(opt::OpType::Cosh, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Div:
|
||||
ans.push_operator(opt::OpType::Div, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Dropout:
|
||||
ans.push_operator(opt::OpType::Dropout, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Exp:
|
||||
ans.push_operator(opt::OpType::Exp, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Flatten:
|
||||
ans.push_operator(opt::OpType::Flatten, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Floor:
|
||||
ans.push_operator(opt::OpType::Floor, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Gather:
|
||||
in.push_back(
|
||||
opt::Tensor::share_single<int>(as<GatherObj>(op)->getAxis()));
|
||||
ans.push_operator(opt::OpType::Gather, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::GreaterThan:
|
||||
ans.push_operator(opt::OpType::Greater, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::GreaterEqual:
|
||||
ans.push_operator(opt::OpType::GreaterOrEqual, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Identity:
|
||||
ans.push_operator(opt::OpType::Identity, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Log:
|
||||
ans.push_operator(opt::OpType::Log, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Matmul: {
|
||||
auto obj = as<MatmulObj>(op);
|
||||
IT_ASSERT(obj->getAct() == ActType::None);
|
||||
in.push_back(opt::Tensor::share_single<float>(1.0f));
|
||||
in.push_back(opt::Tensor::share_single<float>(1.0f));
|
||||
in.push_back(
|
||||
opt::Tensor::share_single<int>(obj->getTransA() ? 1 : 0));
|
||||
in.push_back(
|
||||
opt::Tensor::share_single<int>(obj->getTransB() ? 1 : 0));
|
||||
ans.push_operator(opt::OpType::Gemm, std::move(in), std::move(out));
|
||||
} break;
|
||||
case OpType::Maximum:
|
||||
ans.push_operator(opt::OpType::Max, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::MaxPool: {
|
||||
auto obj = as<MaxPoolObj>(op);
|
||||
in.push_back(
|
||||
opt::Tensor::share_vec<int>({obj->getDh(), obj->getDw()}));
|
||||
in.push_back(
|
||||
opt::Tensor::share_vec<int>({obj->getKh(), obj->getKw()}));
|
||||
in.push_back(
|
||||
opt::Tensor::share_vec<int>({obj->getPh(), obj->getPw()}));
|
||||
in.push_back(
|
||||
opt::Tensor::share_vec<int>({obj->getSh(), obj->getSw()}));
|
||||
ans.push_operator(opt::OpType::AveragePool, std::move(in),
|
||||
std::move(out));
|
||||
} break;
|
||||
case OpType::Minimum:
|
||||
ans.push_operator(opt::OpType::Min, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Mul:
|
||||
ans.push_operator(opt::OpType::Mul, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Neg:
|
||||
ans.push_operator(opt::OpType::Neg, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Not:
|
||||
ans.push_operator(opt::OpType::Not, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Or:
|
||||
ans.push_operator(opt::OpType::Or, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Pad:
|
||||
in.push_back(
|
||||
opt::Tensor::share_vec<int>(as<PadObj>(op)->getPads()));
|
||||
ans.push_operator(opt::OpType::Pad, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Reciprocal:
|
||||
ans.push_operator(opt::OpType::Reciprocal, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::ReduceMean: {
|
||||
const auto obj = as<ReduceMeanObj>(op);
|
||||
const auto axes = obj->getAxes();
|
||||
in.push_back(
|
||||
opt::Tensor::share_vec<int>(vector(axes.begin(), axes.end())));
|
||||
in.push_back(
|
||||
opt::Tensor::share_single<int>(obj->getKeepDims() ? 1 : 0));
|
||||
ans.push_operator(opt::OpType::ReduceMean, std::move(in),
|
||||
std::move(out));
|
||||
} break;
|
||||
case OpType::Relu:
|
||||
ans.push_operator(opt::OpType::Relu, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Reshape:
|
||||
ans.push_operator(opt::OpType::Reshape, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Resize:
|
||||
ans.push_operator(opt::OpType::Resize, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Round:
|
||||
ans.push_operator(opt::OpType::Round, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Shape:
|
||||
ans.push_operator(opt::OpType::Shape, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Sigmoid:
|
||||
ans.push_operator(opt::OpType::Sigmoid, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Sin:
|
||||
ans.push_operator(opt::OpType::Sin, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::SinH:
|
||||
ans.push_operator(opt::OpType::Sinh, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Slice:
|
||||
IT_TODO_HALT();
|
||||
ans.push_operator(opt::OpType::Slice, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Softmax:
|
||||
ans.push_operator(opt::OpType::Softmax, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Split:
|
||||
ans.push_operator(opt::OpType::Split, std::move(in),
|
||||
std::move(out));
|
||||
break;
|
||||
case OpType::Sqrt:
|
||||
ans.push_operator(opt::OpType::Sqrt, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Sub:
|
||||
ans.push_operator(opt::OpType::Sub, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Tan:
|
||||
ans.push_operator(opt::OpType::Tan, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::TanH:
|
||||
ans.push_operator(opt::OpType::Tanh, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Transpose:
|
||||
ans.push_operator(opt::OpType::Tanh, std::move(in), std::move(out));
|
||||
break;
|
||||
case OpType::Xor:
|
||||
ans.push_operator(opt::OpType::Xor, std::move(in), std::move(out));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#undef I
|
||||
return ans;
|
||||
}
|
||||
|
||||
Graph cast(optimization::Unigraph const &g, Runtime rt) {
|
||||
namespace opt = optimization;
|
||||
|
||||
unordered_map<uintptr_t, Tensor> tensors;
|
||||
|
||||
#define I(PTR) reinterpret_cast<uintptr_t>((PTR).get())
|
||||
|
||||
auto ans = make_ref<GraphObj>(std::move(rt));
|
||||
|
||||
for (auto const &op : g.operators) {
|
||||
for (auto const &t : op.inputs) {
|
||||
auto const &shape_ = t->shape;
|
||||
opt::Vec<int> shape(shape_.size());
|
||||
std::transform(shape_.begin(), shape_.end(), shape.begin(),
|
||||
[](auto x) { return static_cast<int>(x); });
|
||||
tensors[I(t)] =
|
||||
ans->addTensor(std::move(shape), cast(t->data_type));
|
||||
}
|
||||
for (auto const &t : op.outputs) {
|
||||
auto const &shape_ = t->shape;
|
||||
opt::Vec<int> shape(shape_.size());
|
||||
std::transform(shape_.begin(), shape_.end(), shape.begin(),
|
||||
[](auto x) { return static_cast<int>(x); });
|
||||
tensors[I(t)] =
|
||||
ans->addTensor(std::move(shape), cast(t->data_type));
|
||||
}
|
||||
switch (op.op_type) {
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#undef I
|
||||
return ans;
|
||||
}
|
||||
|
||||
void GraphObj::optimize() {
|
||||
auto graph = cast(*this);
|
||||
auto ans = cast(graph, this->runtime);
|
||||
}
|
||||
|
||||
void GraphObj::dataMalloc() {
|
||||
|
@ -191,7 +581,8 @@ void GraphObj::replaceConnection(Tensor oldTensor, Tensor newTensor,
|
|||
// tensor's "source" and "target" must be in "ops".
|
||||
// tensor has no "source" and no "target" must not exist.
|
||||
// "inputs" or "outputs" of operators must be in "tensors"
|
||||
// "predecessors" and "successors" of an operator of "ops" must be in "ops".
|
||||
// "predecessors" and "successors" of an operator of "ops" must be in
|
||||
// "ops".
|
||||
bool GraphObj::checkValid() const {
|
||||
for (auto tensor : tensors) {
|
||||
IT_ASSERT(!(tensor->getTargets().size() == 0 &&
|
||||
|
|
|
@ -29,65 +29,97 @@ void SearchEngine::printMetaGraph(Ref<SearchEngine::MetaGraph> metaGraph) {
|
|||
}
|
||||
|
||||
Graph SearchEngine::run(const Graph graph) {
|
||||
IT_ASSERT(runtimeExec == graph->getRuntime());
|
||||
std::cout << "[INFO] original graph: " << std::endl;
|
||||
std::cout << graph->toString();
|
||||
std::cout << "[INFO] perf: " << runtimeExec->getPerfTime(graph)
|
||||
<< std::endl;
|
||||
|
||||
std::vector<Graph> partitions = partitionGraph(graph);
|
||||
|
||||
std::cout << "[INFO] Partition num: " << partitions.size() << std::endl;
|
||||
std::vector<Graph> bestGraphs = {nullptr};
|
||||
for (size_t pid = 0; pid < partitions.size(); pid++) {
|
||||
auto &subGraph = partitions[pid];
|
||||
std::cout << "[INFO] Partition: " << pid << std::endl;
|
||||
std::vector<Graph> candidates = search(subGraph);
|
||||
std::cout << "[INFO] size: " << candidates.size() << std::endl;
|
||||
IT_ASSERT(candidates.size() > 0);
|
||||
std::cout << subGraph->toString() << std::endl;
|
||||
vector<Graph> bestGraphs{nullptr};
|
||||
for (auto &subGraph : partitionGraph(graph)) {
|
||||
std::vector<Graph> nextGraphs;
|
||||
for (auto lastGraph : bestGraphs) {
|
||||
for (auto thisGraph : candidates) {
|
||||
for (auto lastGraph : bestGraphs)
|
||||
for (auto thisGraph : search(subGraph)) {
|
||||
std::vector<Operator> ops;
|
||||
if (lastGraph != nullptr) {
|
||||
for (auto op : lastGraph->getOperators()) {
|
||||
if (lastGraph != nullptr)
|
||||
for (auto op : lastGraph->getOperators())
|
||||
ops.emplace_back(op);
|
||||
}
|
||||
}
|
||||
if (thisGraph != nullptr) {
|
||||
for (auto op : thisGraph->getOperators()) {
|
||||
|
||||
if (thisGraph != nullptr)
|
||||
for (auto op : thisGraph->getOperators())
|
||||
ops.emplace_back(op);
|
||||
}
|
||||
}
|
||||
|
||||
auto tmp = make_ref<GraphObj>(runtimeExec, ops);
|
||||
tmp->dataMalloc();
|
||||
nextGraphs.emplace_back(tmp);
|
||||
}
|
||||
}
|
||||
std::sort(nextGraphs.begin(), nextGraphs.end(), [&](Graph x, Graph y) {
|
||||
return runtimeExec->getPerfTime(x) < runtimeExec->getPerfTime(y);
|
||||
});
|
||||
if (nextGraphs.size() > GRAPH_SIZE) {
|
||||
if (nextGraphs.size() > GRAPH_SIZE)
|
||||
nextGraphs.resize(GRAPH_SIZE);
|
||||
}
|
||||
bestGraphs.clear();
|
||||
for (size_t i = 0; i < nextGraphs.size(); i++) {
|
||||
bestGraphs.emplace_back(nextGraphs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "[INFO] unfused graph: " << std::endl;
|
||||
for (size_t i = 0; i < bestGraphs.size(); i++) {
|
||||
std::cout << "bestGraph " << i << ":" << std::endl;
|
||||
std::cout << bestGraphs[i]->toString();
|
||||
std::cout << "[INFO] perf: " << runtimeExec->getPerfTime(bestGraphs[i])
|
||||
<< std::endl;
|
||||
bestGraphs = nextGraphs;
|
||||
}
|
||||
|
||||
return bestGraphs[0];
|
||||
}
|
||||
|
||||
// Graph SearchEngine::run(const Graph graph) {
|
||||
// IT_ASSERT(runtimeExec == graph->getRuntime());
|
||||
// std::cout << "[INFO] original graph: " << std::endl;
|
||||
// std::cout << graph->toString();
|
||||
// std::cout << "[INFO] perf: " << runtimeExec->getPerfTime(graph)
|
||||
// << std::endl;
|
||||
|
||||
// std::vector<Graph> partitions = partitionGraph(graph);
|
||||
|
||||
// std::cout << "[INFO] Partition num: " << partitions.size() << std::endl;
|
||||
// std::vector<Graph> bestGraphs = {nullptr};
|
||||
// for (size_t pid = 0; pid < partitions.size(); pid++) {
|
||||
// auto &subGraph = partitions[pid];
|
||||
// std::cout << "[INFO] Partition: " << pid << std::endl;
|
||||
// std::vector<Graph> candidates = search(subGraph);
|
||||
// std::cout << "[INFO] size: " << candidates.size() << std::endl;
|
||||
// IT_ASSERT(candidates.size() > 0);
|
||||
// std::cout << subGraph->toString() << std::endl;
|
||||
// std::vector<Graph> nextGraphs;
|
||||
// for (auto lastGraph : bestGraphs) {
|
||||
// for (auto thisGraph : candidates) {
|
||||
// std::vector<Operator> ops;
|
||||
// if (lastGraph != nullptr) {
|
||||
// for (auto op : lastGraph->getOperators()) {
|
||||
// ops.emplace_back(op);
|
||||
// }
|
||||
// }
|
||||
// if (thisGraph != nullptr) {
|
||||
// for (auto op : thisGraph->getOperators()) {
|
||||
// ops.emplace_back(op);
|
||||
// }
|
||||
// }
|
||||
// auto tmp = make_ref<GraphObj>(runtimeExec, ops);
|
||||
// tmp->dataMalloc();
|
||||
// nextGraphs.emplace_back(tmp);
|
||||
// }
|
||||
// }
|
||||
// std::sort(nextGraphs.begin(), nextGraphs.end(), [&](Graph x, Graph y)
|
||||
// {
|
||||
// return runtimeExec->getPerfTime(x) < runtimeExec->getPerfTime(y);
|
||||
// });
|
||||
// if (nextGraphs.size() > GRAPH_SIZE) {
|
||||
// nextGraphs.resize(GRAPH_SIZE);
|
||||
// }
|
||||
// bestGraphs.clear();
|
||||
// for (size_t i = 0; i < nextGraphs.size(); i++) {
|
||||
// bestGraphs.emplace_back(nextGraphs[i]);
|
||||
// }
|
||||
// }
|
||||
|
||||
// std::cout << "[INFO] unfused graph: " << std::endl;
|
||||
// for (size_t i = 0; i < bestGraphs.size(); i++) {
|
||||
// std::cout << "bestGraph " << i << ":" << std::endl;
|
||||
// std::cout << bestGraphs[i]->toString();
|
||||
// std::cout << "[INFO] perf: " <<
|
||||
// runtimeExec->getPerfTime(bestGraphs[i])
|
||||
// << std::endl;
|
||||
// }
|
||||
|
||||
// return bestGraphs[0];
|
||||
// }
|
||||
|
||||
std::vector<Graph> SearchEngine::search(const Graph &graph) {
|
||||
auto metaGraph = buildMetaGraphWithGraph(graph);
|
||||
auto mergedGraphs = searchMerge(metaGraph);
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
#include "optimizations/partitions/partition.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace infini {
|
||||
Partition::CandidateQueue
|
||||
Partition::rankCandidates(const GraphObj &subgraph, const Transformation &tr,
|
||||
const Rating &rating) const {
|
||||
auto substitutes = tr.run(subgraph);
|
||||
CandidateQueue ans;
|
||||
while (!substitutes.empty()) {
|
||||
auto g = std::move(substitutes.back());
|
||||
auto cost = rating.run(*g);
|
||||
ans.push({std::move(g), cost});
|
||||
substitutes.pop_back();
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
} // namespace infini
|
||||
|
||||
namespace x {
|
||||
|
||||
struct Operator;
|
||||
|
||||
/// @brief 未分的完整图或不可再分的最小子图。
|
||||
using UniGraph = std::vector<Operator>;
|
||||
struct Candidate {
|
||||
/// @brief 候选子图。
|
||||
UniGraph graph;
|
||||
/// @brief 子图评分。
|
||||
float score;
|
||||
};
|
||||
/// @brief 一组连接到相同张量、平行的图。
|
||||
using Candidates = std::priority_queue<Candidate>;
|
||||
/// @brief 由多个通过张量相连的子图组合成的完整的图。
|
||||
using Graph = std::vector<Candidates>;
|
||||
|
||||
}; // namespace x
|
|
@ -0,0 +1,11 @@
|
|||
#include "optimizations/partitions/single_operator_partition.h"
|
||||
|
||||
namespace infini {
|
||||
Graph SingleOperatorPartition::run(const GraphObj &graph,
|
||||
const Transformation &tr,
|
||||
const Rating &rating) const {
|
||||
IT_TODO_HALT();
|
||||
return make_ref<GraphObj>(graph);
|
||||
}
|
||||
|
||||
} // namespace infini
|
Loading…
Reference in New Issue