Compare commits

...

34 Commits

Author SHA1 Message Date
YdrMaster c077a61681 fix: 改正 1x1 卷积的变换
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 325b279468 typo
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 7b23fdbbfe feat: 基本实现空洞卷积的变换
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster e3428d8fd8 fix: 正确使用张量中的数据
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 3631b03e73 feat: 优化转置写法
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster f78ff0e8ee feat: 实现 1x1 卷积转矩阵乘
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 4a5e66b36a feat: 区分 1x1 卷积和空洞卷积
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 7d7d923e8d perf: 没有变体就不用评分
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 2a147c235d style: 调整结构
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 17033fad97 feat: 添加导出
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 28287f3782 feat: 完成导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster cc62a3216d feat: 整理子项目结构,实现一部分导入
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 9cfe223953 refactor: 重命名 optimization,以后可以分离为 submodule
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 34f7d7e9ed feat: 图变换相关类分到单独文件中,并模板化。
划分子图时提供一个模板化的子图类型。

Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 59a46f3ff9 feat: Rating 评分对分归一化
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 72788e8e0a feat: 实现子图合并
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster bd61cf4533 docs: 补充文档
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster f0f8915433 docs: 补充文档
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster bb5bfb0be8 docs
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster a56e86dfa9 feat: 提供子图划分、突变生成和评分的类型
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 5129d312d2 feat: 重写
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 45f7e891f1 feat: 增加 io_id 以支持子图划分
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster b4b5157bd4 feat: 增加更多示例
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 63cc93aadc feat: 将数据和张量分离,单独保存
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster ddaf6685b3 fix: compile
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 385586d57b docs: 补充文档
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 320468b627 try: 提出另一种图表示
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 8f38a41fb6 fix: compile
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster a6a0141234 feat: 整理每个子图的突变和评价算法
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 36b0c5855c feat: 添加 PassManager 的运行
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 0ad0150b87 feat: 添加 Pass 的构造
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster 334e0cccbc feat: 添加互换图排序依据的实现
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
YdrMaster cc6c18b00f feat: 调整优化接口
Signed-off-by: YdrMaster <ydrml@hotmail.com>
2023-08-04 16:05:22 +08:00
whjthu d9da06eb67 init optimization-pass 2023-08-04 16:05:22 +08:00
38 changed files with 2346 additions and 50 deletions

View File

@ -60,6 +60,10 @@ if(USE_PROTOBUF)
endif()
include_directories(include)
add_subdirectory(optimization)
include_directories(optimization/include)
# Pybind11
add_subdirectory(3rd-party/pybind11)
include_directories(3rd-party/pybind11/include)
@ -98,7 +102,7 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -UNDEBUG")
# Source files
file(GLOB_RECURSE SRC src/ffi/*.cc src/core/*.cc src/kernels/cpu/*.cc src/nnet/*.cc src/operators/*.cc src/utils/*.cc)
file(GLOB_RECURSE SRC src/ffi/*.cc src/core/*.cc src/kernels/cpu/*.cc src/nnet/*.cc src/operators/*.cc src/optimizations/*.cc src/utils/*.cc)
if(USE_CUDA)
file(GLOB_RECURSE SRC_CUDA src/cuda/*.cc src/cuda/*.cu src/kernels/cuda/*.cc src/kernels/cuda/*.cu)
@ -121,7 +125,7 @@ if(USE_PROTOBUF)
target_link_libraries(InfiniTensor tensor_proto)
endif()
target_link_libraries(InfiniTensor pybind11::embed)
target_link_libraries(InfiniTensor optimization pybind11::embed)
# TVM backend
if(BUILD_TEST_EINNET)
@ -151,7 +155,6 @@ if(USE_INTELCPU)
set(DNNL_CONFIGURATION "cpu_gomp")
find_package(dnnl CONFIG REQUIRED)
if(dnnl_FOUND)
add_compile_definitions(USE_MKL=1)
include_directories(BEFORE ${dnnl_DIR}/../../../cpu_gomp/include/)
link_directories(${dnnl_DIR}/../../../cpu_gomp/lib)
target_link_libraries(InfiniTensor dnnl)

View File

@ -0,0 +1,10 @@
#include "optimizations/partitions/partition.h"
namespace infini {
class GlobalGraphPartition : public Partition {
Graph run(const GraphObj &graph, const Transformation &tr,
const Rating &rating) const override {
return rankCandidates(graph, tr, rating).top().graph;
}
};
} // namespace infini

View File

@ -0,0 +1,33 @@
#pragma once
#include "core/graph.h"
#include "optimizations/rate/rating.h"
#include "optimizations/transformations/transformation.h"
#include <queue>
namespace infini {
class Partition {
public:
virtual Graph run(const GraphObj &, const Transformation &,
const Rating &) const = 0;
struct Candidate {
Graph graph;
Rating::Cost cost;
bool operator<(Candidate others) const { return cost < others.cost; }
bool operator>(Candidate others) const { return cost > others.cost; }
};
protected:
using CandidateQueue = std::priority_queue<Candidate, vector<Candidate>,
std::greater<Candidate>>;
/// @brief Rank the subgraph candidates.
/// @param subgraph The subgraph to transform.
/// @param tr Transformation object.
/// @return Ranked candidates.
CandidateQueue rankCandidates(const GraphObj &subgraph,
const Transformation &tr,
const Rating &rating) const;
};
} // namespace infini

View File

@ -0,0 +1,8 @@
#include "optimizations/partitions/partition.h"
namespace infini {
class SingleOperatorPartition : public Partition {
Graph run(const GraphObj &, const Transformation &,
const Rating &) const override;
};
} // namespace infini

View File

@ -0,0 +1,21 @@
#pragma once
#include "core/graph.h"
#include "optimizations/partitions/partition.h"
namespace infini {
class Pass {
std::unique_ptr<Partition> p;
std::unique_ptr<Transformation> tr;
std::unique_ptr<Rating> rating;
public:
Pass(std::unique_ptr<Partition> p, std::unique_ptr<Transformation> tr,
std::unique_ptr<Rating> rating)
: p(std::move(p)), tr(std::move(tr)), rating(std::move(rating)) {}
Graph run(const GraphObj &graph) const {
return p->run(graph, *tr, *rating);
}
};
} // namespace infini

View File

@ -0,0 +1,25 @@
#include "core/graph.h"
#include "pass.h"
namespace infini {
class PassManager {
public:
PassManager() {}
Graph run(Graph graph) {
for (auto pass : passes)
graph = pass->run(*graph);
return graph;
}
bool addPass(std::unique_ptr<Partition> p,
std::unique_ptr<Transformation> tr,
std::unique_ptr<Rating> rating) {
passes.emplace_back(std::move(p), std::move(tr), std::move(rating));
return true;
}
private:
vector<Ref<Pass>> passes;
};
} // namespace infini

View File

@ -0,0 +1,22 @@
#pragma once
#include "optimizations/rate/rating.h"
#include <numeric>
namespace infini {
/**
* Rate a `Graph` by its memory usage.
*/
class MemoryRating : public Rating {
public:
/**
* Run the `Rating` on the graph.
*/
float run(const GraphObj &graph) const override {
auto tensors = graph.getTensors();
return static_cast<float>(
std::accumulate(tensors.begin(), tensors.end(), (size_t)0,
[](auto x) { return x.size(); }));
}
};
} // namespace infini

View File

@ -0,0 +1,17 @@
#pragma once
#include "core/graph.h"
namespace infini {
/// @brief Rate a `Graph`.
class Rating {
public:
/// @brief Cost of a substitute.
using Cost = float;
/// @brief Run the `Rating` on the `graph`.
/// @param graph The graph to rate.
/// @return The cost of `graph`.
virtual Cost run(const GraphObj &graph) const = 0;
};
} // namespace infini

View File

@ -0,0 +1,16 @@
#pragma once
#include "optimizations/rate/rating.h"
namespace infini {
/**
* Rate a `Graph` by its memory usage.
*/
class TimeRating : public Rating {
public:
/**
* Run the `Rating` on the graph.
*/
float run(const GraphObj &graph) const override;
};
} // namespace infini

View File

@ -0,0 +1,14 @@
#pragma once
#include "core/common.h"
#include "core/graph.h"
#include "core/runtime.h"
namespace infini {
class Transformation {
public:
virtual vector<Graph> run(const GraphObj &graph) const {
return {make_ref<GraphObj>(graph)};
};
};
} // namespace infini

View File

@ -0,0 +1,14 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
project(optimization LANGUAGES CXX C)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_CXX_FLAGS "$ENV{CMAKE_CXX_FLAGS} -fPIC")
file(GLOB_RECURSE SRC src/*.h src/*.cc src/*.cpp)
add_library(optimization ${SRC})
file(GLOB_RECURSE TEST test/*.cpp)
add_executable(test_optimization ${TEST})
target_link_libraries(test_optimization optimization)

14
optimization/Makefile Normal file
View File

@ -0,0 +1,14 @@
.PHONY : build clean install-python test-cpp test-onnx
TYPE ?= release
build:
mkdir -p build/$(TYPE)
cd build/$(TYPE) && cmake $(CMAKE_OPT) ../.. && make -j8
test: build
build/$(TYPE)/test
@echo Done
clean:
rm -rf build

24
optimization/README.md Normal file
View File

@ -0,0 +1,24 @@
# 设计概述
1. 宗旨:简化
尽量简化定义。凡是当前用不到的就去掉,以保证开发同时保持敏捷和可靠。
2. 目标:中层接口
因为不是面向用户开发,不需要考虑接口易用性,也不需要过度考虑防御式编程。调用这些 api 的将会是第一方编写的上层代码。
3. 文档
覆盖率应逼近 100%。
## 代码解析
重要的文件是 [tensor.h](src/tensor.h)、[graph.h](src/graph.h) 和 [mutation.h](src/mutation.h)。
tensor.h 提供了这个图表示中张量的定义。张量的结构由形状、数据类型和数据组成,并储存了在每个图上和算子的连接关系。张量的所有权由所有连接到张量的算子共享,因此其存在的唯一形式就是 `std::shared_ptr`,脱离共享所有权智能指针是没有意义的。通过向工厂函数直接传递形状、数据类型和数据,直接构造智能指针的张量,一旦所有连接到张量的算子释放,算子也被释放。
算子定义和其他重要类型定义在 graph.h。算子中存储且仅存储一个算子类型其他所有信息都由其持有所有权的输入张量表示。算子类型指示了算子如何解释自己的每个输入输出张量。这样做为算子提供了充分的灵活性同时也不损失表达能力——那些决定算子如何工作的重要信息必定已经保存在张量的数据中了而算子类型会解释它们是谁。
算子的所有权属于一张图,确切地说,一张未分的或不可再分的单体图,`Unigraph`。每个算子由唯一的图控制多个图之间不共享算子的任何部分。由于算子的定义非常轻1 枚举 + 2 智能指针的数组),这样做不会带来大的开销,但减轻了所有权的管理难度——有且只有张量一种对象,会在算子这一种对象之间共享,其他所有东西的所有权都是独占的。
同时,`Unigraph` 具有只增性。只能向其中增加算子,必须以拓扑序,不能移除,也不能修改算子的顺序。因此,算子在图中的序号是唯一的,每个图则持有一个唯一的 ID。因此可以用 ID 来指示图,用序号指示算子(`OpRef`);用序号指示算子,再用序号指示张量(`TensorPos`)。图必须整体销毁,销毁时,其中所有算子控制的所有张量连接也会同时销毁。因此,不必维持不可独立存在的所有权关系。
mutation.h 的 `Partition`、`Mutation` 和 `Rating` 三个类用于支持图的规则优化。这三个类本质是一样的,这种定义是为了对优化的不同阶段实现编译时的约束——一轮优化必须按划分→突变→评价的顺序依次执行每个操作一次。
这些类中保存的是一个 `Mutant` 的二维数组。每个 `Mutant` 是子图的一种突变体,存储了子图结构和评分。内层数组表示每个子图的多个变体,外层数组表示每张图的多个子图。显然,`Partition` 输入完整的图,并构建外层数组的结构,`Mutation` 将填充内层数组。`Rating` 填充每个突变体的得分,然后从高到低排序。接下来可以用序号向量指导图的重建。

View File

@ -0,0 +1,22 @@
#pragma once
#include "../../src/mutation.h"
#include "../../src/pass/single_operator.h"
#include <unordered_set>
namespace optimization {
/// @brief Calculates the memory usage of a graph.
/// @param arg0 The graph.
/// @return The reciprocal of the total memory usage of the graph in bytes.
inline float memory_usage(Unigraph const &g) {
std::unordered_set<size_t> mark;
uintptr_t memory;
for (const auto &op : g.operators)
for (const auto &t : op.outputs)
if (mark.insert(reinterpret_cast<uintptr_t>(t.get())).second)
memory += t->size();
return 1e6f / static_cast<float>(memory);
}
} // namespace optimization

78
optimization/src/data.h Normal file
View File

@ -0,0 +1,78 @@
#pragma once
#include <cstdint>
#include <cstring>
#include <utility>
#include <vector>
namespace optimization {
/// @brief Stores tensor data。
class Data {
/// @brief `cpu_data` is stored in the memory space,
/// which allows it to be managed using `std::vector<uint8_t>`.
uint8_t *cpu_data;
// #ifdef USE_CUDA
// void *gpu_data;
// #endif
// #ifdef USE_BANG
// void *mlu_data;
// #endif
Data(uint8_t *ptr) : cpu_data(ptr) {}
public:
Data() : cpu_data(nullptr) {}
Data(size_t size) : cpu_data(new uint8_t[size]) {}
template <class t> Data(t begin, t end) : cpu_data(nullptr) {
size_t c = sizeof(decltype(*begin)) * static_cast<size_t>(end - begin);
cpu_data = new uint8_t[c];
std::copy(begin, end, cpu_data);
}
Data(Data const &) = delete;
Data(Data &&others) noexcept
: cpu_data(std::exchange(others.cpu_data, nullptr)) {}
~Data() noexcept { delete[] cpu_data; }
Data &operator=(Data const &) = delete;
Data &operator=(Data &&others) noexcept {
if (this != &others)
delete[] std::exchange(cpu_data,
std::exchange(others.cpu_data, nullptr));
return *this;
}
/// @brief Builds `Data` from `vector` os any type `t`.
/// @tparam t Data type.
/// @param data Data `vector`.
/// @return `Data` object.
template <class t> static Data cpu(std::vector<t> const &data) {
auto const len = data.size();
auto const size = sizeof(t[len]);
Data ans;
memcpy(ans.cpu_data, data.data(), size);
return ans;
}
/// @brief Gets data ptr.
/// @tparam t Data type.
/// @return Data ptr.
template <class t> t *as_ptr() const {
return reinterpret_cast<t *>(cpu_data);
}
/// @brief Copies data to a `Vec`.
/// @tparam t Data type.
/// @param len Count of data.
/// @return The data `Vec`.
template <class t> std::vector<t> to_vec(size_t len) const {
std::vector<t> ans(len);
memcpy(cpu_data, ans.data(), sizeof(t[len]));
return ans;
}
};
} // namespace optimization

View File

@ -0,0 +1,34 @@
#include "data_type.h"
using namespace optimization;
size_t DataType::size() const {
switch (id) {
case DataTypeId::FLOAT:
return sizeof(float);
case DataTypeId::UINT8:
return sizeof(uint8_t);
case DataTypeId::INT8:
return sizeof(int8_t);
case DataTypeId::UINT16:
return sizeof(uint16_t);
case DataTypeId::INT16:
return sizeof(int16_t);
case DataTypeId::INT32:
return sizeof(int32_t);
case DataTypeId::INT64:
return sizeof(int64_t);
case DataTypeId::BOOL:
return sizeof(bool);
case DataTypeId::FLOAT16:
return 2;
case DataTypeId::DOUBLE:
return sizeof(double);
case DataTypeId::UINT32:
return sizeof(uint32_t);
case DataTypeId::UINT64:
return sizeof(uint64_t);
default:
throw "unsupported data type.";
}
}

View File

@ -0,0 +1,47 @@
#pragma once
#include <cstddef>
#include <cstdint>
namespace optimization {
enum class DataTypeId : uint8_t {
UNDEFINED,
FLOAT,
UINT8,
INT8,
UINT16,
INT16,
INT32,
INT64,
STRING,
BOOL,
FLOAT16,
DOUBLE,
UINT32,
UINT64,
// COMPLEX64,
// COMPLEX128,
// BFLOAT16,
};
struct DataType {
DataTypeId id;
size_t size() const;
};
template <class t> DataType ty();
template <> inline DataType ty<float>() { return {DataTypeId::FLOAT}; }
template <> inline DataType ty<uint8_t>() { return {DataTypeId::UINT8}; }
template <> inline DataType ty<int8_t>() { return {DataTypeId::INT8}; }
template <> inline DataType ty<uint16_t>() { return {DataTypeId::UINT16}; }
template <> inline DataType ty<int16_t>() { return {DataTypeId::INT16}; }
template <> inline DataType ty<int32_t>() { return {DataTypeId::INT32}; }
template <> inline DataType ty<int64_t>() { return {DataTypeId::INT64}; }
template <> inline DataType ty<bool>() { return {DataTypeId::BOOL}; }
template <> inline DataType ty<double>() { return {DataTypeId::DOUBLE}; }
template <> inline DataType ty<uint32_t>() { return {DataTypeId::UINT32}; }
template <> inline DataType ty<uint64_t>() { return {DataTypeId::UINT64}; }
} // namespace optimization

68
optimization/src/graph.cc Normal file
View File

@ -0,0 +1,68 @@
#include "graph.h"
using namespace optimization;
static size_t GRAPH_ID = 1;
Unigraph::Unigraph() : id(GRAPH_ID++) {}
Unigraph::Unigraph(Unigraph &&others)
: id(std::exchange(others.id, 0)), operators(std::move(others.operators)) {}
Unigraph::~Unigraph() {
for (auto &op : operators) {
for (auto &i : op.inputs)
i->target.erase(i->target.find(this->id));
for (auto &o : op.outputs)
o->source.erase(o->source.find(this->id));
}
}
Unigraph &Unigraph::operator=(Unigraph &&others) {
if (this == &others)
return *this;
for (auto &op : operators) {
for (auto &i : op.inputs)
i->target.erase(i->target.find(this->id));
for (auto &o : op.outputs)
o->source.erase(o->source.find(this->id));
}
this->id = std::exchange(others.id, 0);
this->operators = std::move(others.operators);
return *this;
}
OpRef Unigraph::push_operator( // fmt: new line
OpType op_type, //
Vec<Arc<Tensor>> inputs, //
Vec<Arc<Tensor>> outputs //
) {
auto ans = OpRef{this->id, operators.size()};
size_t i = 0;
for (auto &input : inputs) {
auto it = input->target.find(ans.graph);
if (it == input->target.end())
input->target[ans.graph] = {{ans.op, i++}};
else
it->second.push_back({ans.op, i++});
}
i = 0;
for (auto &output : outputs) {
auto it = output->source.find(ans.graph);
if (it == output->source.end())
output->source[ans.graph] = {ans.op, i++};
else
throw "tensor source exist";
}
this->operators.push_back({
op_type, // fmt: new line
std::move(inputs), //
std::move(outputs), //
});
return ans;
}

55
optimization/src/graph.h Normal file
View File

@ -0,0 +1,55 @@
#pragma once
#include "op_type.h"
#include "tensor.h"
namespace optimization {
/// @brief a struct to represent an operator in the computation graph.
/// The ownership of an `Operator` belongs to one `Unigraph`.
struct Operator {
/// @brief Type of the operator.
OpType op_type;
/// @brief Input and output tensors.
/// Notice: ownership of the tensors are shared between
/// operators that generate and use the same tensor.
Vec<Arc<Tensor>> inputs, outputs;
};
/// @brief A reference of an `Operator` in a `Unigraph`.
struct OpRef {
/// @brief `graph` for unique identifier of `Unigraph`.
/// `op` for `Operator` index in `Unigraph`.
size_t graph, op;
};
/// @brief An unpartitioned graph or an unpartitionable minimum graph.
struct Unigraph {
/// @brief Unique identifier.
size_t id;
/// @brief List of operators in the graph with topological order.
Vec<Operator> operators;
Unigraph();
Unigraph(Unigraph const &) = delete;
Unigraph(Unigraph &&others);
~Unigraph();
Unigraph &operator=(Unigraph const &) = delete;
Unigraph &operator=(Unigraph &&);
/// @brief Pushs an `Operator` into graph.
/// Every `Operator` must be pushed in topological order.
/// @param op_type Operator type.
/// @param inputs Input tensors.
/// @param outputs Output tensors.
/// @return An `OpRef`.
OpRef push_operator( // fmt: new line
OpType op_type, //
Vec<Arc<Tensor>> inputs, //
Vec<Arc<Tensor>> outputs //
);
};
} // namespace optimization

164
optimization/src/mutation.h Normal file
View File

@ -0,0 +1,164 @@
#pragma once
#include "graph.h"
#include <functional>
namespace optimization {
/// @brief A candidate subgraph mutant.
struct Mutant {
/// @brief The mutated subgraph.
Unigraph graph;
/// @brief A score representing the quality of the mutant.
float score;
Mutant(Unigraph &&g) : graph(std::move(g)), score(1.0f) {}
Mutant(Mutant const &) = delete;
Mutant(Mutant &&others)
: graph(std::move(others.graph)),
score(std::exchange(others.score, 1.0f)) {}
Mutant &operator=(Mutant const &) = delete;
Mutant &operator=(Mutant &&others) {
if (this != &others) {
this->graph = std::move(others.graph);
this->score = std::exchange(others.score, 1.0f);
}
return *this;
}
};
/// @brief A subgraph partition with `PartitionType`, will be mutated into
/// multiple `Mutant`s.
/// @tparam PartitionType To partition this subgraph.
template <class PartitionType> struct SubGraph {
Vec<Mutant> mutants;
PartitionType type;
};
template <class t> Vec<size_t> list_size(Vec<Vec<t>> const &);
template <class PartitionType> class Mutation;
template <class PartitionType> class Rating;
/// @brief Partitioned subgraphs.
template <class PartitionType> struct Partition {
/// @brief 2D vector of `Mutant` instances for each partitioned subgraph.
Vec<SubGraph<PartitionType>> parts;
friend Mutation<PartitionType>;
public:
/// @brief A functional object that takes an unpartitioned graph as input
/// and returns a vector of partitioned subgraphs.
using Func =
std::function<Vec<std::pair<Unigraph, PartitionType>>(Unigraph &&)>;
/// @brief Constructs a partitioned graph from an unpartitioned graph
/// using a partitioning function.
/// @param g An unpartitioned graph.
/// @param f A function that takes an unpartitioned graph as input
/// and returns a vector of partitioned subgraphs.
Partition(Unigraph &&g, Func const &f) {
for (auto &[g_, t] : f(std::move(g))) {
auto &sub = this->parts.emplace_back();
sub.mutants.emplace_back(std::move(g_));
sub.type = std::move(t);
}
}
/// @brief Returns mutant vector size.
/// @return 2D vector size.
Vec<size_t> size() const { return list_size(parts); }
};
/// @brief Generates mutants for every subgraph.
template <class PartitionType> class Mutation {
/// @brief 2D vector of `Mutant` instances for each partitioned subgraph.
Vec<SubGraph<PartitionType>> parts;
friend Rating<PartitionType>;
public:
/// @brief A functional object that takes a subgraph as input
/// and returns a vector of mutated graphs.
using Func =
std::function<Vec<Unigraph>(Unigraph const &, PartitionType const &)>;
/// @brief Mutates every subgraph in a partitioned graph.
/// @param p The partitioned graph to be mutated.
/// @param f A function that takes a subgraph as input
/// and returns a vector of mutated graphs.
Mutation(Partition<PartitionType> &&p, Func const &f)
: parts(std::move(p.parts)) {
for (auto &sub : parts)
for (auto &m : f(sub.mutants.front().graph, sub.type))
sub.mutants.emplace_back(std::move(m));
}
/// @brief Returns mutant vector size.
/// @return 2D vector size.
Vec<size_t> size() const { return list_size(parts); }
};
/// @brief Rates each subgraph mutant.
template <class PartitionType> class Rating {
/// @brief 2D vector of `Mutant` instances for each partitioned subgraph.
Vec<SubGraph<PartitionType>> parts;
public:
/// @brief A functional object that takes a mutated subgraph as input
/// and returns its score.
using Func = std::function<float(Unigraph const &)>;
/// @brief Rates every mutated subgraph with a `Rating::Func`.
/// @param m The mutated subgraphs to be rated.
/// @param f A function that takes a mutated subgraph as input
/// and returns its score.
Rating(Mutation<PartitionType> &&m, Func const &f)
: parts(std::move(m.parts)) {
for (auto &sub : parts)
if (sub.mutants.size() > 1) {
auto sum = 0.0f;
for (auto &c : sub.mutants)
sum += (c.score = f(c.graph));
sum = std::abs(sum);
for (auto &c : sub.mutants)
c.score /= sum;
std::sort(sub.mutants.begin(), sub.mutants.end(),
[](auto const &a, auto const &b) {
return a.score > b.score;
});
}
}
/// @brief Returns mutant vector size.
/// @return 2D vector size.
Vec<size_t> size() const { return list_size(parts); }
/// @brief Builds `Unigraph` from the subgraphs
/// with specified indices.
/// @param indices Subgraph indices.
/// @return Merged `Unigraph`.
Unigraph build(Vec<size_t> const &indices) const {
const auto size = indices.size();
if (size != parts.size())
throw "indices size wrong";
Unigraph ans;
for (size_t i = 0; i < size; ++i)
for (const auto &op :
parts.at(i).mutants.at(indices[i]).graph.operators)
ans.push_operator(op.op_type, op.inputs, op.outputs);
return ans;
}
};
template <class t> Vec<size_t> list_size(Vec<SubGraph<t>> const &list) {
Vec<size_t> ans(list.size());
std::transform(list.begin(), list.end(), ans.begin(),
[](const auto &e) { return e.mutants.size(); });
return ans;
}
} // namespace optimization

196
optimization/src/op_type.h Normal file
View File

@ -0,0 +1,196 @@
#pragma once
#include <cstdint>
namespace optimization {
enum class OpType : uint16_t {
Abs,
Acos,
Acosh,
Add,
And,
ArgMax,
Asin,
Asinh,
Atan,
Atanh,
AveragePool,
BatchNormalization,
Bernoulli,
BitShift,
BitwiseAnd,
BitwiseNot,
BitwiseOr,
BitwiseXor,
BlackmanWindow,
Cast,
CastLike,
Ceil,
Celu,
CenterCropPad,
Clip,
Col2lm,
Compress,
Concat,
ConcatFromSequence,
// Constant, // -> Input
ConstantOfShape,
Conv,
ConvInteger,
ConvTranspose,
Cos,
Cosh,
CumSum,
DFT,
DeformConv,
DepthToSpace,
DequantizeLinear,
Det,
Div,
Dropout,
DynamicQuantizeLinear,
Einsum,
Elu,
Equal,
Erf,
Exp,
Expand,
EyeLike,
Flatten,
Floor,
GRU,
Gather,
GatherElements,
GatherND,
Gemm,
GlobalAveragePool,
GlobalLpPool,
GlobalMaxPool,
Greater,
GreaterOrEqual,
GridSample,
GroupNormalization,
HammingWindow,
HannWindow,
HardSigmoid,
HardSwish,
Hardmax,
Identity,
If,
InstanceNormalization,
IsInf,
IsNaN,
LRN,
LSTM,
LayerNormalization,
LeakyRelu,
Less,
LessOrEqual,
Log,
LogSoftmax,
Loop,
LpNormalization,
LpPool,
MatMul,
MatMulInteger,
Max,
MaxPool,
MaxRoiPool,
MaxUnpool,
Mean,
MeanVarianceNormalization,
MelWeightMatrix,
Min,
Mish,
Mod,
Mul,
Multinomial,
Neg,
NegativeLogLikelihoodLoss,
NonMaxSuppression,
NonZero,
Not,
OneHot,
Optional,
OptionalGetElement,
OptionalHasElement,
Or,
PRelu,
Pad,
Pow,
QLinearConv,
QLinearMatMul,
QuantizeLinear,
RNN,
RandomNormal,
RandomNormalLike,
RandomUniform,
RandomUniformLike,
Range,
Reciprocal,
ReduceL1,
ReduceL2,
ReduceLogSum,
ReduceLogSumExp,
ReduceMax,
ReduceMean,
ReduceMin,
ReduceProd,
ReduceSum,
ReduceSumSquare,
Relu,
Reshape,
Resize,
ReverseSequence,
RoiAlign,
Round,
STFT,
Scan,
Scatter,
ScatterElements,
ScatterND,
Selu,
SequenceAt,
SequenceConstruct,
SequenceEmpty,
SequenceErase,
SequenceInsert,
SequenceLength,
SequenceMap,
Shape,
Shrink,
Sigmoid,
Sign,
Sin,
Sinh,
Size,
Slice,
Softmax,
SoftmaxCrossEntropyLoss,
Softplus,
Softsign,
SpaceToDepth,
Split,
SplitToSequence,
Sqrt,
Squeeze,
StringNormalizer,
Sub,
Sum,
Tan,
Tanh,
TfIdfVectorizer,
ThresholdedRelu,
Tile,
TopK,
Transpose,
Trilu,
Unique,
Unsqueeze,
Upsample,
Where,
Xor,
};
} // namespace optimization

View File

@ -0,0 +1,22 @@
#pragma once
#include "../graph.h"
namespace optimization {
class Conv {
Operator const &op;
public:
explicit Conv(Operator &op) : op(op) {}
explicit Conv(Operator const &op) : op(op) {}
Arc<Tensor> const &input() const { return op.inputs.at(0); }
Arc<Tensor> const &kernel() const { return op.inputs.at(1); }
Arc<Tensor> const &dilations() const { return op.inputs.at(2); }
Arc<Tensor> const &pads() const { return op.inputs.at(3); }
Arc<Tensor> const &strides() const { return op.inputs.at(4); }
Arc<Tensor> const &output() const { return op.outputs.at(0); }
};
} // namespace optimization

View File

@ -0,0 +1,197 @@
#include "single_operator.h"
#include "../operator/conv.h"
#include <iterator>
#include <map>
#include <numeric>
using namespace optimization;
using namespace pass;
Vec<std::pair<Unigraph, SingleOperator>>
optimization::pass::partition(Unigraph &&g) {
Vec<std::pair<Unigraph, SingleOperator>> ans;
for (auto &op : g.operators) {
auto &[g, t] = ans.emplace_back();
g.push_operator(op.op_type, op.inputs, op.outputs);
}
return ans;
}
// 1st: new shape
// 2nd: permutation
static std::pair<Vec<size_t>, Vec<size_t>> // fmt: new line
transpose( //
Vec<size_t> const &shape, //
char const *src, // source tensor layout
char const *tgt // target tensor layout
) {
// assert( shape.size() == str_len(src) == str_len(tgt) )
std::map<char, size_t> indices;
for (size_t i = 0; i < shape.size(); ++i)
indices[src[i]] = i;
auto ans = std::make_pair( // fmt: new line
Vec<size_t>(shape.size()), // shape
Vec<size_t>(shape.size()) // permutation
);
for (auto i = 0; i < shape.size(); ++i)
ans.first[i] = shape[ans.second[i] = indices[tgt[i]]];
return ans;
}
Vec<Unigraph> optimization::pass::mutate( // fmt: new line
Unigraph const &g, //
SingleOperator const & //
) {
Vec<Unigraph> ans;
switch (g.operators.front().op_type) {
case OpType::Conv: {
auto const conv = Conv(g.operators.front());
auto const &i_shape = conv.input()->shape;
auto const &k_shape = conv.kernel()->shape;
auto const &dilations = conv.dilations()->to_vec<int64_t>();
auto const &strides = conv.strides()->to_vec<int64_t>();
// assert(conv.input()->data_type == conv.kernel()->data_type);
auto const dt = conv.input()->data_type;
if (k_shape.rbegin()[0] == 1 // fmt: new line
&& k_shape.rbegin()[1] == 1 //
&& i_shape[1] == k_shape[1] // group = 1
&& std::all_of(strides.begin(), strides.end(),
[](auto x) { return x == 1; })) {
// 1x1 conv
auto &mutant = ans.emplace_back();
// (input, "nchw"->"nhwc") -|transpose|-> tranposed -|reshape|-> t0
Arc<Tensor> t0;
{
auto [shape_, permute_] = transpose(i_shape, "nchw", "nhwc");
auto tranposed = Tensor::share(std::move(shape_), dt, {});
auto permutation = Tensor::share_vec(std::move(permute_));
mutant.push_operator(OpType::Transpose,
{conv.input(), std::move(permutation)},
{tranposed});
mutant.push_operator(
OpType::Reshape, {std::move(tranposed)},
{t0 = Tensor::share(
{shape_[0] * shape_[1] * shape_[2], shape_[3]}, dt,
{})});
}
// (kernel,"fcrs"->"cfrs") -|transpose|-> tranposed -|reshape|-> t1
Arc<Tensor> t1;
{
auto [shape_, permute_] = transpose(k_shape, "fcrs", "cfrs");
auto tranposed = Tensor::share(std::move(shape_), dt, {});
auto permutation = Tensor::share_vec(std::move(permute_));
mutant.push_operator(OpType::Transpose,
{conv.kernel(), std::move(permutation)},
{tranposed});
mutant.push_operator(
OpType::Reshape, {std::move(tranposed)},
{t1 = Tensor::share(
{shape_[0], shape_[1] /* * shape_[2] * shape_[3] */},
dt, {})});
}
// (t0,t1) -|matmul|-> x -|reshape|-> t2
auto x = Tensor::share({t0->shape[0], t1->shape[1]}, dt, {});
mutant.push_operator(OpType::MatMul, {std::move(t0), std::move(t1)},
{x});
auto t2 = Tensor::share(
{i_shape[0], i_shape[2], i_shape[3], k_shape[0]}, dt, {});
mutant.push_operator(OpType::Reshape, {std::move(x)}, {t2});
// (t2,"nhwf"->"nfhw") -|transpose|-> output
{
auto [shape_, permute_] = transpose(t2->shape, "nhwf", "nfhw");
// auto tranposed = Tensor::share(std::move(shape_), dt, {});
auto permutation = Tensor::share_vec(std::move(permute_));
mutant.push_operator(OpType::Transpose,
{std::move(t2), std::move(permutation)},
{conv.output()});
}
} else if (
// group = 1
i_shape[1] == k_shape[1]
// stride[*] = 1
&& std::all_of(strides.begin(), strides.end(),
[](auto x) { return x == 1; })
// dilation[*] > 1
&& std::any_of(dilations.begin(), dilations.end(),
[](auto x) { return x > 1; })) {
// dilated conv
auto &mutant = ans.emplace_back();
auto t0 = Tensor::share(
{
i_shape[0],
i_shape[1],
i_shape[2] / dilations[0],
static_cast<size_t>(dilations[0]),
i_shape[3] / dilations[1],
static_cast<size_t>(dilations[1]),
},
dt, {});
mutant.push_operator(OpType::Reshape, {conv.input()}, {t0});
auto [shape_, permute_] = transpose(t0->shape, "nc1234", "n24c13");
auto transposed = Tensor::share(shape_, dt, {});
auto permutation = Tensor::share_vec(std::move(permute_));
mutant.push_operator(OpType::Transpose,
{std::move(t0), std::move(permutation)},
{transposed});
auto t1 = Tensor::share(
{
shape_[0] * shape_[1] * shape_[2],
shape_[3],
shape_[4],
shape_[5],
},
dt, {});
mutant.push_operator(OpType::Reshape, {std::move(transposed)},
{t1});
Vec<size_t> shape__{
shape_[0] * shape_[1] * shape_[2],
k_shape[1],
conv.output()->shape[2] / shape_[1],
conv.output()->shape[3] / shape_[2],
};
auto t2 = Tensor::share(shape__, dt, {});
mutant.push_operator(OpType::Conv,
{
std::move(t1),
conv.kernel(),
Tensor::share_vec<size_t>({1, 1}),
conv.pads(),
conv.strides(),
},
{t2});
auto t3 = Tensor::share({shape_[0], shape_[1], shape_[2],
shape__[1], shape__[2], shape__[3]},
dt, {});
mutant.push_operator(OpType::Reshape, {std::move(t2)}, {t3});
auto [shape___, permute__] =
transpose(t3->shape, "n12chw", "nc1h2w");
auto transposed_ = Tensor::share(shape___, dt, {});
auto permutation_ = Tensor::share_vec(std::move(permute__));
mutant.push_operator(OpType::Transpose,
{std::move(t3), std::move(permutation_)},
{transposed_});
mutant.push_operator(OpType::Reshape, {std::move(t3)},
{conv.output()});
}
} break;
default:
break;
}
return ans;
}

View File

@ -0,0 +1,22 @@
#pragma once
#include "../mutation.h"
namespace optimization::pass {
/// @brief Partition every operator as a `Unigraph`.
struct SingleOperator {};
/// @brief Splits a graph into subgraphs, where each subgraph contains
/// only a single operator.
/// @param arg0 An unpartitioned graph.
/// @return A vector of individual subgraphs.
Vec<std::pair<Unigraph, SingleOperator>> partition(Unigraph &&);
/// @brief Mutates the single operator graph.
/// @param g The subgraph.
/// @param arg1 Never used.
/// @return Mutants.
Vec<Unigraph> mutate(Unigraph const &g, SingleOperator const &);
} // namespace optimization::pass

View File

@ -0,0 +1,28 @@
#include "tensor.h"
#include <numeric>
using namespace optimization;
Arc<Tensor> Tensor::share(Vec<size_t> shape, DataType data_type, Data data) {
return Arc<Tensor>(
new Tensor(std::move(shape), std::move(data_type), std::move(data)));
}
size_t Tensor::count() const {
return shape.empty() // fmt: new line
? 0
: std::accumulate(shape.begin(), shape.end(), 1,
[](auto acc, auto it) { return acc * it; });
}
size_t Tensor::size() const {
return shape.empty() // fmt: new line
? 0
: std::accumulate(shape.begin(), shape.end(), data_type.size(),
[](auto acc, auto it) { return acc * it; });
}
Tensor::Tensor(Vec<size_t> &&shape, DataType &&data_type, Data &&data)
: shape(std::move(shape)), // fmt: new line
data_type(std::move(data_type)), //
data(std::move(data)) {}

87
optimization/src/tensor.h Normal file
View File

@ -0,0 +1,87 @@
#pragma once
#include "data.h"
#include "data_type.h"
#include <memory>
#include <unordered_map>
#include <vector>
namespace optimization {
/// @brief Defines a template alias for `std::vector`.
template <class t> using Vec = std::vector<t>;
/// @brief Defines a template alias for std::shared_ptr
template <class t> using Arc = std::shared_ptr<t>;
/// @brief A tensor represented by its position in `Unigraph`.
struct TensorPos {
/// @brief `op` for `Operator` index in `Unigraph`.
/// `idx` for index in `Operator` inputs or outputs.
size_t op, idx;
};
/// @brief A struct to represent a tensor in the computation graph.
/// The ownership of a `Tensor` belongs to all the operators
/// that generate it or it passed to.
struct Tensor {
/// @brief Tensor shape.
Vec<size_t> shape;
/// @brief Element data type.
DataType data_type;
/// @brief Data of tensor.
Data data;
/// @brief Operators in different `Unigraph` that generate this tensor.
std::unordered_map<size_t, TensorPos> source;
/// @brief Operators in different `Unigraph` that take this tensor as input.
std::unordered_map<size_t, Vec<TensorPos>> target;
/// @brief A static factory method to create a `shared_ptr<Tensor>`.
/// @param shape Tensor shape.
/// @param data_type Element data type.
/// @param data Data.
/// @return A `shared_ptr<Tensor>`.
static Arc<Tensor> share(Vec<size_t> shape, DataType data_type, Data data);
/// @brief A static factory method to create a `shared_ptr<Tensor>` with
/// single data.
/// @tparam t Data type.
/// @param val Data value.
/// @return A `shared_ptr<Tensor>`.
template <class t> static Arc<Tensor> share_single(t val) {
return Tensor::share({1}, ty<t>(), Data::cpu<t>({val}));
}
/// @brief A static factory method to create a `shared_ptr<Tensor>` with
/// 1D data.
/// @tparam t Data type.
/// @param val Data value.
/// @return A `shared_ptr<Tensor>`.
template <class t> static Arc<Tensor> share_vec(Vec<t> val) {
return Tensor::share({val.size()}, ty<t>(),
Data::cpu<t>(std::move(val)));
}
/// @brief Calculates count of data in this tensor.
/// @return Data count.
size_t count() const;
/// @brief Calculates the size of the tensor in bytes.
/// @return Memory usage in bytes.
size_t size() const;
/// @brief Copies tensor data to a `Vec`.
/// @tparam t Data type.
/// @return The data `Vec`.
template <class t> Vec<t> to_vec() const { return data.to_vec<t>(count()); }
private:
/// @brief Constructor is private and only accessible by the factory method.
Tensor(Vec<size_t> &&, DataType &&, Data &&);
};
} // namespace optimization

29
optimization/src0/data.h Normal file
View File

@ -0,0 +1,29 @@
#pragma once
#include <cstdint>
#include <cstring>
#include <vector>
/// @brief Stores tensor data。
struct Data {
/// @brief `cpu_data` is stored in the memory space,
/// which allows it to be managed using `std::vector<uint8_t>`.
std::vector<uint8_t> cpu_data;
// #ifdef USE_CUDA
// void *gpu_data;
// #endif
// #ifdef USE_BANG
// void *mlu_data;
// #endif
/// @brief Builds `Data` from `vector` os any type `t`.
/// @tparam t Data type.
/// @param data Data `vector`.
/// @return `Data` object.
template <class t> static Data cpu(std::vector<t> data) {
Data ans{std::vector<uint8_t>(sizeof(t) * data.size())};
memcpy(ans.cpu_data.data(), data.data(), ans.cpu_data.size());
return ans;
}
};

View File

@ -0,0 +1,32 @@
#include "data_type.h"
size_t DataType::size() const {
switch (id) {
case DataTypeId::FLOAT:
return sizeof(float);
case DataTypeId::UINT8:
return sizeof(uint8_t);
case DataTypeId::INT8:
return sizeof(int8_t);
case DataTypeId::UINT16:
return sizeof(uint16_t);
case DataTypeId::INT16:
return sizeof(int16_t);
case DataTypeId::INT32:
return sizeof(int32_t);
case DataTypeId::INT64:
return sizeof(int64_t);
case DataTypeId::BOOL:
return sizeof(bool);
case DataTypeId::FLOAT16:
return 2;
case DataTypeId::DOUBLE:
return sizeof(double);
case DataTypeId::UINT32:
return sizeof(uint32_t);
case DataTypeId::UINT64:
return sizeof(uint64_t);
default:
throw "unsupported data type.";
}
}

View File

@ -0,0 +1,43 @@
#pragma once
#include <cstddef>
#include <cstdint>
enum class DataTypeId : uint8_t {
UNDEFINED,
FLOAT,
UINT8,
INT8,
UINT16,
INT16,
INT32,
INT64,
STRING,
BOOL,
FLOAT16,
DOUBLE,
UINT32,
UINT64,
// COMPLEX64,
// COMPLEX128,
// BFLOAT16,
};
struct DataType {
DataTypeId id;
size_t size() const;
};
template <class t> DataType ty();
template <> inline DataType ty<float>() { return {DataTypeId::FLOAT}; }
template <> inline DataType ty<uint8_t>() { return {DataTypeId::UINT8}; }
template <> inline DataType ty<int8_t>() { return {DataTypeId::INT8}; }
template <> inline DataType ty<uint16_t>() { return {DataTypeId::UINT16}; }
template <> inline DataType ty<int16_t>() { return {DataTypeId::INT16}; }
template <> inline DataType ty<int32_t>() { return {DataTypeId::INT32}; }
template <> inline DataType ty<int64_t>() { return {DataTypeId::INT64}; }
template <> inline DataType ty<bool>() { return {DataTypeId::BOOL}; }
template <> inline DataType ty<double>() { return {DataTypeId::DOUBLE}; }
template <> inline DataType ty<uint32_t>() { return {DataTypeId::UINT32}; }
template <> inline DataType ty<uint64_t>() { return {DataTypeId::UINT64}; }

196
optimization/src0/graph.h Normal file
View File

@ -0,0 +1,196 @@
#pragma once
#include <numeric>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "data.h"
#include "data_type.h"
#include "op_type.h"
/// @brief A tensor represented by which `node` it is passed to
/// and at which `slot` in inputs of that `node`.
struct InletPos {
size_t node, slot;
};
/// @brief A tensor represented by which `node` it is generated from
/// and at which `slot` in outputs of that `node`.
struct OutletPos {
size_t node, slot;
};
/// @brief Calculates the hash of `OutletPos`.
struct OutletPosHash {
size_t operator()(OutletPos const &o) const {
return o.node ^ (o.slot << 1);
}
};
/// @brief The data structure represents a `Outlet` of a operator,
/// which generates a tensor, and it is part of the `Node`.
/// @tparam Tensor discripter.
template <class Tensor> struct Outlet {
Tensor info;
std::vector<InletPos> targets;
explicit Outlet(Tensor info) : info(info), targets({}) {}
};
/// @brief The specific tensor information excludes all unknowns.
/// This struct can be used as a tensor discripter type in templates.
struct TensorInfo {
std::vector<size_t> shape;
DataType data_type;
/// @brief Tensor memory usage.
/// @return Memory bytes.
size_t size() const {
return shape.empty() // fmt: new line
? 0
: std::accumulate(
shape.begin(), shape.end(), data_type.size(),
[](auto acc, auto it) { return acc * it; });
}
};
/// @brief Operator `Node` of the dataflow `Graph`.
/// @tparam Tensor discripter.
template <class Tensor> struct Node {
OpType op_type;
std::vector<OutletPos> inputs;
std::vector<Outlet<Tensor>> outputs;
};
/// @brief A reference of an operator `Node` in a dataflow `Graph`.
struct OpRef {
/// @brief Index of operator `Node` in the corresponding `Graph`.
size_t node_idx;
/// @brief Builds `OutletPos` from `OpRef`.
/// @param slot Index of output for operator `Node`.
/// @return An `OutletPos`.
OutletPos operator[](size_t slot) const { return {node_idx, slot}; }
};
/// @brief The dataflow `Graph`.
/// @tparam Tensor discripter.
///
/// **NOTICE** Methods of a template class must be defined in the same file
/// as the class.
template <class Tensor> class Graph {
/// @brief `operators` must be topo sorted.
std::vector<Node<Tensor>> _operators;
/// @brief A map to store data, where the key is the index of input node,
/// and the value is data.
std::unordered_map<size_t, Data> _data;
/// @brief
std::unordered_map<size_t, size_t> _io_id;
static size_t IO_ID;
public:
/// @brief Pushs a new operator `Node` into `Graph`.
/// @param op_type Operator type.
/// @param inputs Tensors passed to operator.
/// @param outputs Tensors generated by operator.
/// @return A reference to the `Node` in `Graph`.
OpRef push_operator( // fmt: new line
OpType op_type, //
std::vector<OutletPos> inputs, //
std::vector<Outlet<Tensor>> outputs //
) {
if (op_type == OpType::Input)
throw "use `push_input` instead";
else if (op_type == OpType::Output)
throw "use `push_output` instead";
auto index = _operators.size();
for (const auto &input : inputs)
if (input.node >= index)
throw "input node not exist";
size_t i = 0;
for (const auto &input : inputs)
_operators[input.node] // fmt: new line
.outputs[input.slot] //
.targets //
.push_back({index, ++i});
_operators.push_back({op_type, std::move(inputs), std::move(outputs)});
return {index};
}
/// @brief Pushs a new `Input` `Node` into `Graph`.
/// @param output Tensor from `Input`.
/// @param id IO id of `Input`.
/// @return A reference to the `Node` in `Graph`.
OpRef push_input(Outlet<Tensor> output, std::optional<size_t> id) {
auto index = _operators.size();
_io_id[index] = id ? *id : IO_ID++;
_operators.push_back({OpType::Input, {}, {output}});
return {index};
}
/// @brief Pushs a new `Output` `Node` into `Graph`.
/// @param input Tensor to `Output`.
/// @param id IO id of `Output`.
/// @return A reference to the `Node` in `Graph`.
OpRef push_output(OutletPos input, std::optional<size_t> id) {
auto index = _operators.size();
_io_id[index] = id ? *id : IO_ID++;
_operators.push_back({OpType::Output, {input}, {}});
return {index};
}
/// @brief Pushs data of `Input` `Node` into `Graph`.
/// @param input A reference to the `Input` `Node`.
/// @param data Data to store.
void push_data(OpRef const &input, Data data) {
if (input.node_idx >= _operators.size())
throw "input node not exist";
const auto &op = _operators.at(input.node_idx);
if (op.op_type != OpType::Input)
throw "only input node can have data";
if (!data.cpu_data.empty() &&
data.cpu_data.size() != op.outputs.front().info.size())
throw "wrong data size";
_data[input.node_idx] = std::move(data);
}
/// @brief Gets operators in the `Graph`.
/// @return Operators in the `Graph`.
std::vector<Node<Tensor>> const &operators() const { return _operators; }
/// @brief `Graph` inputs.
/// @return Indices of input `Node`s in `Graph`.
std::vector<size_t> inputs() const {
std::vector<size_t> ans;
size_t i = 0;
for (const auto &node : _operators) {
if (node.op_type == OpType::Input && _data.find(i) != _data.end())
ans.push_back(i);
++i;
}
return ans;
}
/// @brief `Graph` outputs.
/// @return Indices of output `Node`s in `Graph`.
std::vector<size_t> outputs() const {
std::vector<size_t> ans;
size_t i = 0;
for (const auto &node : _operators) {
if (node.op_type == OpType::Output)
ans.push_back(i);
++i;
}
return ans;
}
};
template <class Tensor> size_t Graph<Tensor>::IO_ID = 0;

193
optimization/src0/op_type.h Normal file
View File

@ -0,0 +1,193 @@
#pragma once
enum class OpType : uint16_t {
Input,
Output,
Abs,
Acos,
Acosh,
Add,
And,
ArgMax,
Asin,
Asinh,
Atan,
Atanh,
AveragePool,
BatchNormalization,
Bernoulli,
BitShift,
BitwiseAnd,
BitwiseNot,
BitwiseOr,
BitwiseXor,
BlackmanWindow,
Cast,
CastLike,
Ceil,
Celu,
CenterCropPad,
Clip,
Col2lm,
Compress,
Concat,
ConcatFromSequence,
// Constant, // -> Input
ConstantOfShape,
Conv,
ConvInteger,
ConvTranspose,
Cos,
Cosh,
CumSum,
DFT,
DeformConv,
DepthToSpace,
DequantizeLinear,
Det,
Div,
Dropout,
DynamicQuantizeLinear,
Einsum,
Elu,
Equal,
Erf,
Exp,
Expand,
EyeLike,
Flatten,
Floor,
GRU,
Gather,
GatherElements,
GatherND,
Gemm,
GlobalAveragePool,
GlobalLpPool,
GlobalMaxPool,
Greater,
GreaterOrEqual,
GridSample,
GroupNormalization,
HammingWindow,
HannWindow,
HardSigmoid,
HardSwish,
Hardmax,
Identity,
If,
InstanceNormalization,
IsInf,
IsNaN,
LRN,
LSTM,
LayerNormalization,
LeakyRelu,
Less,
LessOrEqual,
Log,
LogSoftmax,
Loop,
LpNormalization,
LpPool,
MatMul,
MatMulInteger,
Max,
MaxPool,
MaxRoiPool,
MaxUnpool,
Mean,
MeanVarianceNormalization,
MelWeightMatrix,
Min,
Mish,
Mod,
Mul,
Multinomial,
Neg,
NegativeLogLikelihoodLoss,
NonMaxSuppression,
NonZero,
Not,
OneHot,
Optional,
OptionalGetElement,
OptionalHasElement,
Or,
PRelu,
Pad,
Pow,
QLinearConv,
QLinearMatMul,
QuantizeLinear,
RNN,
RandomNormal,
RandomNormalLike,
RandomUniform,
RandomUniformLike,
Range,
Reciprocal,
ReduceL1,
ReduceL2,
ReduceLogSum,
ReduceLogSumExp,
ReduceMax,
ReduceMean,
ReduceMin,
ReduceProd,
ReduceSum,
ReduceSumSquare,
Relu,
Reshape,
Resize,
ReverseSequence,
RoiAlign,
Round,
STFT,
Scan,
Scatter,
ScatterElements,
ScatterND,
Selu,
SequenceAt,
SequenceConstruct,
SequenceEmpty,
SequenceErase,
SequenceInsert,
SequenceLength,
SequenceMap,
Shape,
Shrink,
Sigmoid,
Sign,
Sin,
Sinh,
Size,
Slice,
Softmax,
SoftmaxCrossEntropyLoss,
Softplus,
Softsign,
SpaceToDepth,
Split,
SplitToSequence,
Sqrt,
Squeeze,
StringNormalizer,
Sub,
Sum,
Tan,
Tanh,
TfIdfVectorizer,
ThresholdedRelu,
Tile,
TopK,
Transpose,
Trilu,
Unique,
Unsqueeze,
Upsample,
Where,
Xor,
};

View File

@ -0,0 +1,47 @@
#include "../src/graph.h"
#include <iostream>
int main() {
try {
Graph<TensorInfo> g;
auto a = g.push_input( // fmt: new line
Outlet(TensorInfo{{1, 1, 2, 3}, ty<float>()}), // output
std::nullopt // id
);
g.push_data(a, Data::cpu<float>({1, 2, 3, 4, 5, 6}));
auto b = g.push_input( // fmt: new line
Outlet(TensorInfo{{1, 1, 3, 1}, ty<float>()}), // output
std::nullopt // id
);
g.push_data(b, Data::cpu<float>({1, 2, 3}));
auto matmul = g.push_operator( // fmt: new line
OpType::MatMul, // op_type
{a[0], b[0]}, // inputs
{Outlet(TensorInfo{{1, 1, 2, 1}, ty<float>()})} // outputs
);
g.push_output( // fmt: new line
matmul[0], // input
std::nullopt // id
);
std::cout << "inputs: ";
for (auto it : g.inputs()) {
std::cout << it << " ";
}
std::cout << std::endl;
std::cout << "outputs: ";
for (auto it : g.outputs()) {
std::cout << it << " ";
}
std::cout << std::endl;
return 0;
} catch (const char *e) {
std::cerr << "[ERROR] " << e << std::endl;
return 1;
}
}

View File

@ -0,0 +1,43 @@
#include "../include/optimization/common.h"
#include <iostream>
#include <unordered_set>
using namespace optimization;
int main() {
try {
Unigraph g;
auto a = Tensor::share( // fmt: new line
{1, 1, 2, 3}, //
ty<float>(), //
Data::cpu<float>({1, 2, 3, 4, 5, 6}));
auto b = Tensor::share( // fmt: new line
{1, 1, 3, 1}, //
ty<float>(), //
Data::cpu<float>({1, 2, 3}));
auto c = Tensor::share( // fmt: new line
{1, 1, 2, 1}, //
ty<float>(), //
{});
auto matmul = g.push_operator( // fmt: new line
OpType::MatMul, // op_type
{a, b}, // inputs
{c} // outputs
);
auto p = Partition<pass::SingleOperator>(std::move(g), pass::partition);
auto m = Mutation<pass::SingleOperator>(
std::move(p),
[](const auto &g, const auto &t) { return Vec<Unigraph>{}; });
auto r = Rating<pass::SingleOperator>(std::move(m), memory_usage);
auto ans = r.build(Vec<size_t>(r.size().size(), 0));
return 0;
} catch (const char *e) {
std::cerr << "[ERROR] " << e << std::endl;
return 1;
}
}

View File

@ -1,4 +1,13 @@
#include "core/graph.h"
#include "operators/concat.h"
#include "operators/conv.h"
#include "operators/gather.h"
#include "operators/matmul.h"
#include "operators/pad.h"
#include "operators/pooling.h"
#include "operators/reduce_mean.h"
#include "operators/unary.h"
#include "optimization/common.h"
#include <algorithm>
#include <queue>
@ -114,13 +123,394 @@ bool GraphObj::topo_sort() {
return this->sorted = true;
}
void GraphObj::optimize() {
for (auto &op : ops) {
optimization::DataType cast(DataType ty) {
#define IT(A, B) \
if (ty == DataType::A) \
return {optimization::DataTypeId::B};
IT(Float32, FLOAT) //
else IT(UInt32, UINT32) //
else IT(UInt8, UINT8) //
else IT(Int8, INT8) //
else IT(UInt16, UINT16) //
else IT(Int16, INT16) //
else IT(Int32, INT32) //
else IT(Int64, INT64) //
else IT_ASSERT(false, "unsupported data type");
#undef IT
}
DataType cast(optimization::DataType ty) {
#define IT(A, B) \
if (optimization::DataTypeId::A == ty.id) \
return {DataType::B};
IT(FLOAT, Float32) //
else IT(UINT32, UInt32) //
else IT(UINT8, UInt8) //
else IT(INT8, Int8) //
else IT(UINT16, UInt16) //
else IT(INT16, Int16) //
else IT(INT32, Int32) //
else IT(INT64, Int64) //
else IT_ASSERT(false, "unsupported data type");
#undef IT
}
optimization::Unigraph cast(GraphObj &g) {
namespace opt = optimization;
g.topo_sort();
#define I(PTR) reinterpret_cast<uintptr_t>((PTR).get())
unordered_map<uintptr_t, opt::Arc<opt::Tensor>> tensors;
for (const auto &t : g.getTensors()) {
const auto dims = t->getDims();
opt::Vec<size_t> shape(dims.size());
std::transform(dims.begin(), dims.end(), shape.begin(),
[](auto x) { return static_cast<size_t>(x); });
opt::Data data;
if (t->hasData()) {
auto ptr = t->getDataBlob()->getPtr<uint8_t *>();
data = opt::Data(ptr, ptr + t->getBytes());
}
tensors[I(t)] =
opt::Tensor::share(shape, cast(t->getDType()), std::move(data));
}
opt::Unigraph ans;
for (const auto &op : g.getOperators()) {
const auto inputs = op->getInputs(), outputs = op->getOutputs();
opt::Vec<opt::Arc<opt::Tensor>> in(inputs.size()), out(outputs.size());
std::transform(inputs.begin(), inputs.end(), in.begin(),
[&](auto x) { return tensors[I(x)]; });
std::transform(outputs.begin(), outputs.end(), out.begin(),
[&](auto x) { return tensors[I(x)]; });
switch (op->getOpType()) {
case OpType::Abs:
ans.push_operator(opt::OpType::Abs, std::move(in), std::move(out));
break;
case OpType::ACos:
ans.push_operator(opt::OpType::Acos, std::move(in), std::move(out));
break;
case OpType::ACosH:
ans.push_operator(opt::OpType::Acosh, std::move(in),
std::move(out));
break;
case OpType::Add:
ans.push_operator(opt::OpType::Add, std::move(in), std::move(out));
break;
case OpType::And:
ans.push_operator(opt::OpType::And, std::move(in), std::move(out));
break;
case OpType::ASin:
ans.push_operator(opt::OpType::Asin, std::move(in), std::move(out));
break;
case OpType::ASinH:
ans.push_operator(opt::OpType::Asinh, std::move(in),
std::move(out));
break;
case OpType::ATan:
ans.push_operator(opt::OpType::Atan, std::move(in), std::move(out));
break;
case OpType::ATanH:
ans.push_operator(opt::OpType::Atanh, std::move(in),
std::move(out));
break;
case OpType::AvgPool: {
auto obj = as<AvgPoolObj>(op);
in.push_back(
opt::Tensor::share_vec<int>({obj->getDh(), obj->getDw()}));
in.push_back(
opt::Tensor::share_vec<int>({obj->getKh(), obj->getKw()}));
in.push_back(
opt::Tensor::share_vec<int>({obj->getPh(), obj->getPw()}));
in.push_back(
opt::Tensor::share_vec<int>({obj->getSh(), obj->getSw()}));
ans.push_operator(opt::OpType::AveragePool, std::move(in),
std::move(out));
} break;
case OpType::BatchNorm:
ans.push_operator(opt::OpType::BatchNormalization, std::move(in),
std::move(out));
break;
case OpType::BitLeftShift:
in.push_back(opt::Tensor::share_single<uint8_t>(0));
ans.push_operator(opt::OpType::BitShift, std::move(in),
std::move(out));
break;
case OpType::BitRightShift:
in.push_back(opt::Tensor::share_single<uint8_t>(1));
ans.push_operator(opt::OpType::BitShift, std::move(in),
std::move(out));
break;
case OpType::BitAnd:
ans.push_operator(opt::OpType::BitwiseAnd, std::move(in),
std::move(out));
break;
case OpType::BitNot:
ans.push_operator(opt::OpType::BitwiseNot, std::move(in),
std::move(out));
break;
case OpType::BitOr:
ans.push_operator(opt::OpType::BitwiseOr, std::move(in),
std::move(out));
break;
case OpType::BitXor:
ans.push_operator(opt::OpType::BitwiseXor, std::move(in),
std::move(out));
break;
case OpType::Cast:
ans.push_operator(opt::OpType::Cast, std::move(in), std::move(out));
break;
case OpType::Ceil:
ans.push_operator(opt::OpType::Ceil, std::move(in), std::move(out));
break;
case OpType::Clip: {
auto obj = as<ClipObj>(op);
auto min = obj->getMin();
auto max = obj->getMax();
in.push_back(
opt::Tensor::share_single<float>(min ? *min : -INFINITY));
in.push_back(
opt::Tensor::share_single<float>(max ? *max : INFINITY));
ans.push_operator(opt::OpType::Clip, std::move(in), std::move(out));
} break;
case OpType::Concat:
in.push_back(
opt::Tensor::share_single<int>(as<ConcatObj>(op)->getDim()));
ans.push_operator(opt::OpType::Concat, std::move(in),
std::move(out));
break;
case OpType::Conv: {
auto obj = as<ConvObj>(op);
in.push_back(opt::Tensor::share_vec<size_t>(
{(size_t)obj->getDh(), (size_t)obj->getDw()}));
in.push_back(opt::Tensor::share_vec<size_t>(
{(size_t)obj->getPh(), (size_t)obj->getPw()}));
in.push_back(opt::Tensor::share_vec<size_t>(
{(size_t)obj->getSh(), (size_t)obj->getSw()}));
ans.push_operator(opt::OpType::Conv, std::move(in), std::move(out));
} break;
case OpType::Cos:
ans.push_operator(opt::OpType::Cos, std::move(in), std::move(out));
break;
case OpType::CosH:
ans.push_operator(opt::OpType::Cosh, std::move(in), std::move(out));
break;
case OpType::Div:
ans.push_operator(opt::OpType::Div, std::move(in), std::move(out));
break;
case OpType::Dropout:
ans.push_operator(opt::OpType::Dropout, std::move(in),
std::move(out));
break;
case OpType::Exp:
ans.push_operator(opt::OpType::Exp, std::move(in), std::move(out));
break;
case OpType::Flatten:
ans.push_operator(opt::OpType::Flatten, std::move(in),
std::move(out));
break;
case OpType::Floor:
ans.push_operator(opt::OpType::Floor, std::move(in),
std::move(out));
break;
case OpType::Gather:
in.push_back(
opt::Tensor::share_single<int>(as<GatherObj>(op)->getAxis()));
ans.push_operator(opt::OpType::Gather, std::move(in),
std::move(out));
break;
case OpType::GreaterThan:
ans.push_operator(opt::OpType::Greater, std::move(in),
std::move(out));
break;
case OpType::GreaterEqual:
ans.push_operator(opt::OpType::GreaterOrEqual, std::move(in),
std::move(out));
break;
case OpType::Identity:
ans.push_operator(opt::OpType::Identity, std::move(in),
std::move(out));
break;
case OpType::Log:
ans.push_operator(opt::OpType::Log, std::move(in), std::move(out));
break;
case OpType::Matmul: {
auto obj = as<MatmulObj>(op);
IT_ASSERT(obj->getAct() == ActType::None);
in.push_back(opt::Tensor::share_single<float>(1.0f));
in.push_back(opt::Tensor::share_single<float>(1.0f));
in.push_back(
opt::Tensor::share_single<int>(obj->getTransA() ? 1 : 0));
in.push_back(
opt::Tensor::share_single<int>(obj->getTransB() ? 1 : 0));
ans.push_operator(opt::OpType::Gemm, std::move(in), std::move(out));
} break;
case OpType::Maximum:
ans.push_operator(opt::OpType::Max, std::move(in), std::move(out));
break;
case OpType::MaxPool: {
auto obj = as<MaxPoolObj>(op);
in.push_back(
opt::Tensor::share_vec<int>({obj->getDh(), obj->getDw()}));
in.push_back(
opt::Tensor::share_vec<int>({obj->getKh(), obj->getKw()}));
in.push_back(
opt::Tensor::share_vec<int>({obj->getPh(), obj->getPw()}));
in.push_back(
opt::Tensor::share_vec<int>({obj->getSh(), obj->getSw()}));
ans.push_operator(opt::OpType::AveragePool, std::move(in),
std::move(out));
} break;
case OpType::Minimum:
ans.push_operator(opt::OpType::Min, std::move(in), std::move(out));
break;
case OpType::Mul:
ans.push_operator(opt::OpType::Mul, std::move(in), std::move(out));
break;
case OpType::Neg:
ans.push_operator(opt::OpType::Neg, std::move(in), std::move(out));
break;
case OpType::Not:
ans.push_operator(opt::OpType::Not, std::move(in), std::move(out));
break;
case OpType::Or:
ans.push_operator(opt::OpType::Or, std::move(in), std::move(out));
break;
case OpType::Pad:
in.push_back(
opt::Tensor::share_vec<int>(as<PadObj>(op)->getPads()));
ans.push_operator(opt::OpType::Pad, std::move(in), std::move(out));
break;
case OpType::Reciprocal:
ans.push_operator(opt::OpType::Reciprocal, std::move(in),
std::move(out));
break;
case OpType::ReduceMean: {
const auto obj = as<ReduceMeanObj>(op);
const auto axes = obj->getAxes();
in.push_back(
opt::Tensor::share_vec<int>(vector(axes.begin(), axes.end())));
in.push_back(
opt::Tensor::share_single<int>(obj->getKeepDims() ? 1 : 0));
ans.push_operator(opt::OpType::ReduceMean, std::move(in),
std::move(out));
} break;
case OpType::Relu:
ans.push_operator(opt::OpType::Relu, std::move(in), std::move(out));
break;
case OpType::Reshape:
ans.push_operator(opt::OpType::Reshape, std::move(in),
std::move(out));
break;
case OpType::Resize:
ans.push_operator(opt::OpType::Resize, std::move(in),
std::move(out));
break;
case OpType::Round:
ans.push_operator(opt::OpType::Round, std::move(in),
std::move(out));
break;
case OpType::Shape:
ans.push_operator(opt::OpType::Shape, std::move(in),
std::move(out));
break;
case OpType::Sigmoid:
ans.push_operator(opt::OpType::Sigmoid, std::move(in),
std::move(out));
break;
case OpType::Sin:
ans.push_operator(opt::OpType::Sin, std::move(in), std::move(out));
break;
case OpType::SinH:
ans.push_operator(opt::OpType::Sinh, std::move(in), std::move(out));
break;
case OpType::Slice:
IT_TODO_HALT();
ans.push_operator(opt::OpType::Slice, std::move(in),
std::move(out));
break;
case OpType::Softmax:
ans.push_operator(opt::OpType::Softmax, std::move(in),
std::move(out));
break;
case OpType::Split:
ans.push_operator(opt::OpType::Split, std::move(in),
std::move(out));
break;
case OpType::Sqrt:
ans.push_operator(opt::OpType::Sqrt, std::move(in), std::move(out));
break;
case OpType::Sub:
ans.push_operator(opt::OpType::Sub, std::move(in), std::move(out));
break;
case OpType::Tan:
ans.push_operator(opt::OpType::Tan, std::move(in), std::move(out));
break;
case OpType::TanH:
ans.push_operator(opt::OpType::Tanh, std::move(in), std::move(out));
break;
case OpType::Transpose:
ans.push_operator(opt::OpType::Tanh, std::move(in), std::move(out));
break;
case OpType::Xor:
ans.push_operator(opt::OpType::Xor, std::move(in), std::move(out));
break;
default:
break;
}
}
#undef I
return ans;
}
Graph cast(optimization::Unigraph const &g, Runtime rt) {
namespace opt = optimization;
unordered_map<uintptr_t, Tensor> tensors;
#define I(PTR) reinterpret_cast<uintptr_t>((PTR).get())
auto ans = make_ref<GraphObj>(std::move(rt));
for (auto const &op : g.operators) {
for (auto const &t : op.inputs) {
auto const &shape_ = t->shape;
opt::Vec<int> shape(shape_.size());
std::transform(shape_.begin(), shape_.end(), shape.begin(),
[](auto x) { return static_cast<int>(x); });
tensors[I(t)] =
ans->addTensor(std::move(shape), cast(t->data_type));
}
for (auto const &t : op.outputs) {
auto const &shape_ = t->shape;
opt::Vec<int> shape(shape_.size());
std::transform(shape_.begin(), shape_.end(), shape.begin(),
[](auto x) { return static_cast<int>(x); });
tensors[I(t)] =
ans->addTensor(std::move(shape), cast(t->data_type));
}
switch (op.op_type) {
default:
break;
}
}
#undef I
return ans;
}
void GraphObj::optimize() {
auto graph = cast(*this);
auto ans = cast(graph, this->runtime);
}
void GraphObj::dataMalloc() {
@ -191,7 +581,8 @@ void GraphObj::replaceConnection(Tensor oldTensor, Tensor newTensor,
// tensor's "source" and "target" must be in "ops".
// tensor has no "source" and no "target" must not exist.
// "inputs" or "outputs" of operators must be in "tensors"
// "predecessors" and "successors" of an operator of "ops" must be in "ops".
// "predecessors" and "successors" of an operator of "ops" must be in
// "ops".
bool GraphObj::checkValid() const {
for (auto tensor : tensors) {
IT_ASSERT(!(tensor->getTargets().size() == 0 &&

View File

@ -29,65 +29,97 @@ void SearchEngine::printMetaGraph(Ref<SearchEngine::MetaGraph> metaGraph) {
}
Graph SearchEngine::run(const Graph graph) {
IT_ASSERT(runtimeExec == graph->getRuntime());
std::cout << "[INFO] original graph: " << std::endl;
std::cout << graph->toString();
std::cout << "[INFO] perf: " << runtimeExec->getPerfTime(graph)
<< std::endl;
std::vector<Graph> partitions = partitionGraph(graph);
std::cout << "[INFO] Partition num: " << partitions.size() << std::endl;
std::vector<Graph> bestGraphs = {nullptr};
for (size_t pid = 0; pid < partitions.size(); pid++) {
auto &subGraph = partitions[pid];
std::cout << "[INFO] Partition: " << pid << std::endl;
std::vector<Graph> candidates = search(subGraph);
std::cout << "[INFO] size: " << candidates.size() << std::endl;
IT_ASSERT(candidates.size() > 0);
std::cout << subGraph->toString() << std::endl;
vector<Graph> bestGraphs{nullptr};
for (auto &subGraph : partitionGraph(graph)) {
std::vector<Graph> nextGraphs;
for (auto lastGraph : bestGraphs) {
for (auto thisGraph : candidates) {
for (auto lastGraph : bestGraphs)
for (auto thisGraph : search(subGraph)) {
std::vector<Operator> ops;
if (lastGraph != nullptr) {
for (auto op : lastGraph->getOperators()) {
if (lastGraph != nullptr)
for (auto op : lastGraph->getOperators())
ops.emplace_back(op);
}
}
if (thisGraph != nullptr) {
for (auto op : thisGraph->getOperators()) {
if (thisGraph != nullptr)
for (auto op : thisGraph->getOperators())
ops.emplace_back(op);
}
}
auto tmp = make_ref<GraphObj>(runtimeExec, ops);
tmp->dataMalloc();
nextGraphs.emplace_back(tmp);
}
}
std::sort(nextGraphs.begin(), nextGraphs.end(), [&](Graph x, Graph y) {
return runtimeExec->getPerfTime(x) < runtimeExec->getPerfTime(y);
});
if (nextGraphs.size() > GRAPH_SIZE) {
if (nextGraphs.size() > GRAPH_SIZE)
nextGraphs.resize(GRAPH_SIZE);
}
bestGraphs.clear();
for (size_t i = 0; i < nextGraphs.size(); i++) {
bestGraphs.emplace_back(nextGraphs[i]);
}
}
std::cout << "[INFO] unfused graph: " << std::endl;
for (size_t i = 0; i < bestGraphs.size(); i++) {
std::cout << "bestGraph " << i << ":" << std::endl;
std::cout << bestGraphs[i]->toString();
std::cout << "[INFO] perf: " << runtimeExec->getPerfTime(bestGraphs[i])
<< std::endl;
bestGraphs = nextGraphs;
}
return bestGraphs[0];
}
// Graph SearchEngine::run(const Graph graph) {
// IT_ASSERT(runtimeExec == graph->getRuntime());
// std::cout << "[INFO] original graph: " << std::endl;
// std::cout << graph->toString();
// std::cout << "[INFO] perf: " << runtimeExec->getPerfTime(graph)
// << std::endl;
// std::vector<Graph> partitions = partitionGraph(graph);
// std::cout << "[INFO] Partition num: " << partitions.size() << std::endl;
// std::vector<Graph> bestGraphs = {nullptr};
// for (size_t pid = 0; pid < partitions.size(); pid++) {
// auto &subGraph = partitions[pid];
// std::cout << "[INFO] Partition: " << pid << std::endl;
// std::vector<Graph> candidates = search(subGraph);
// std::cout << "[INFO] size: " << candidates.size() << std::endl;
// IT_ASSERT(candidates.size() > 0);
// std::cout << subGraph->toString() << std::endl;
// std::vector<Graph> nextGraphs;
// for (auto lastGraph : bestGraphs) {
// for (auto thisGraph : candidates) {
// std::vector<Operator> ops;
// if (lastGraph != nullptr) {
// for (auto op : lastGraph->getOperators()) {
// ops.emplace_back(op);
// }
// }
// if (thisGraph != nullptr) {
// for (auto op : thisGraph->getOperators()) {
// ops.emplace_back(op);
// }
// }
// auto tmp = make_ref<GraphObj>(runtimeExec, ops);
// tmp->dataMalloc();
// nextGraphs.emplace_back(tmp);
// }
// }
// std::sort(nextGraphs.begin(), nextGraphs.end(), [&](Graph x, Graph y)
// {
// return runtimeExec->getPerfTime(x) < runtimeExec->getPerfTime(y);
// });
// if (nextGraphs.size() > GRAPH_SIZE) {
// nextGraphs.resize(GRAPH_SIZE);
// }
// bestGraphs.clear();
// for (size_t i = 0; i < nextGraphs.size(); i++) {
// bestGraphs.emplace_back(nextGraphs[i]);
// }
// }
// std::cout << "[INFO] unfused graph: " << std::endl;
// for (size_t i = 0; i < bestGraphs.size(); i++) {
// std::cout << "bestGraph " << i << ":" << std::endl;
// std::cout << bestGraphs[i]->toString();
// std::cout << "[INFO] perf: " <<
// runtimeExec->getPerfTime(bestGraphs[i])
// << std::endl;
// }
// return bestGraphs[0];
// }
std::vector<Graph> SearchEngine::search(const Graph &graph) {
auto metaGraph = buildMetaGraphWithGraph(graph);
auto mergedGraphs = searchMerge(metaGraph);

View File

@ -0,0 +1,38 @@
#include "optimizations/partitions/partition.h"
#include <algorithm>
namespace infini {
Partition::CandidateQueue
Partition::rankCandidates(const GraphObj &subgraph, const Transformation &tr,
const Rating &rating) const {
auto substitutes = tr.run(subgraph);
CandidateQueue ans;
while (!substitutes.empty()) {
auto g = std::move(substitutes.back());
auto cost = rating.run(*g);
ans.push({std::move(g), cost});
substitutes.pop_back();
}
return ans;
}
} // namespace infini
namespace x {
struct Operator;
/// @brief 未分的完整图或不可再分的最小子图。
using UniGraph = std::vector<Operator>;
struct Candidate {
/// @brief 候选子图。
UniGraph graph;
/// @brief 子图评分。
float score;
};
/// @brief 一组连接到相同张量、平行的图。
using Candidates = std::priority_queue<Candidate>;
/// @brief 由多个通过张量相连的子图组合成的完整的图。
using Graph = std::vector<Candidates>;
}; // namespace x

View File

@ -0,0 +1,11 @@
#include "optimizations/partitions/single_operator_partition.h"
namespace infini {
Graph SingleOperatorPartition::run(const GraphObj &graph,
const Transformation &tr,
const Rating &rating) const {
IT_TODO_HALT();
return make_ref<GraphObj>(graph);
}
} // namespace infini