12#include <unordered_set>
17namespace Autoscheduler {
23using std::unordered_set;
49template<
typename PostCreateMutator>
62template<
typename PostCreateMutator>
97 template<
typename PostCreateMutator>
188 struct CompareStates {
190 return a->cost > b->cost;
194 std::vector<IntrusivePtr<State>> storage;
199 if (sz >= storage.size()) {
200 storage.resize(std::max(sz * 2, (
size_t)64));
202 internal_assert(sz < storage.size()) << sz <<
" " << storage.size() <<
"\n";
203 storage[sz] = std::move(
s);
205 std::push_heap(storage.begin(), storage.begin() + sz, CompareStates{});
209 internal_assert(sz <= storage.size()) << sz <<
" " << storage.size() <<
"\n";
210 std::pop_heap(storage.begin(), storage.begin() + sz, CompareStates{});
212 return std::move(storage[sz]);
228 storage.swap(
other.storage);
229 std::swap(sz,
other.sz);
237 std::make_heap(storage.begin(), storage.begin() + sz, CompareStates{});
241 for (
size_t i = 0;
i < sz;
i++) {
#define internal_assert(c)
void swap(StateQueue &other)
const IntrusivePtr< State > & top()
void emplace(IntrusivePtr< State > &&s)
IntrusivePtr< State > pop()
IntrusivePtr< State > operator[](int idx) const
A class representing a reference count to be used with IntrusivePtr.
A single definition of a Func.
bool compute_root_and_inline_only()
int64_t get_stack_memory_limit()
bool use_adjusted_tilings()
constexpr int kLocalMemoryLimit
double get_stack_memory_adjustment_factor()
bool verify_memoized_features()
bool is_memoize_blocks_enabled()
void deep_copy_loop_nest(LoopNest *new_loop_nest, const LoopNest *new_loop_nest_parent, const IntrusivePtr< const LoopNest > &existing_loop_nest, const PostCreateMutator &post_create_mutator)
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr cast(Expr a)
Cast an expression to the halide type corresponding to the C++ type T.
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
void operator()(LoopNest *new_loop_nest) const
void operator()(LoopNest *new_loop_nest) const
void split_compute_root_loops(LoopNest *loop_nest) const
void add_outer_thread_loops(LoopNest *loop_nest) const
const Anderson2021Params & params
void save_featurization(const FunctionDAG &dag, const Anderson2021Params ¶ms, const Target &target, std::ostream &out) const
bool exceeds_serial_extents_limit(const Target &target) const
int64_t get_shared_mem_alloc_size(const LoopNest *block, const LoopNest *loop) const
void compute_loop_nest_parents(map< const LoopNest *, pair< const LoopNest *, int > > &p, const LoopNest *here, int depth) const
bool has_compute_root_loops_without_blocks() const
void update_always_consider_inline_options(const FunctionDAG::Node *node)
bool can_fuse_gpu(const vector< int64_t > ¶llel_extents) const
bool should_always_consider_inline(const FunctionDAG::Node *node) const
bool contains_store_at_further_in_than_outermost() const
int64_t total_loop_extents_of_ancestors(const map< const LoopNest *, pair< const LoopNest *, int > > &parent, const LoopNest *loop) const
void operator=(const State &)=delete
State(const State &)=delete
bool has_dynamic_allocation_inside_thread() const
void fuse_gpu_blocks(LoopNest::StageScheduleState *state, Stage &stage, const vector< VarOrRVar > ¶llel_vars, const vector< int64_t > ¶llel_extents, const vector< int > &constant_extents) const
void print_compute_locations() const
bool mark_gpu_threads(LoopNest::StageScheduleState *state, Stage &stage, std::unordered_set< std::string > &new_serial_vars, std::ostringstream &staged_funcs_schedule_source) const
NodeMap< bool > always_consider_inline
IntrusivePtr< const State > parent
void operator=(State &&)=delete
uint64_t structural_hash(int depth) const
IntrusivePtr< const LoopNest > root
bool exceeds_shared_memory_limit(const Anderson2021Params ¶ms, const Target &target) const
const LoopNest * deepest_valid_compute_location(const Anderson2021Params ¶ms, const map< const LoopNest *, pair< const LoopNest *, int > > &parent, const FunctionDAG::Node &node, const LoopNest *loop, const LoopNest *root, StageMap< int64_t > &total_shared_mem_alloc_sizes) const
bool compute_featurization(const FunctionDAG &dag, const Anderson2021Params ¶ms, const Target &target, StageMap< ScheduleFeatures > *features, Statistics &stats, bool verbose=false) const
bool contains_store_at(const set< const FunctionDAG::Node * > &outermost_store_at, const IntrusivePtr< const LoopNest > &parent) const
void mark_gpu_blocks(LoopNest::StageScheduleState *state, Stage &stage, const vector< VarOrRVar > ¶llel_vars, const vector< int64_t > ¶llel_extents) const
IntrusivePtr< const LoopNest > get_root_for_features(const Anderson2021Params ¶ms, const Target &target) const
bool calculate_cost(const FunctionDAG &dag, const Anderson2021Params ¶ms, const Target &target, CostModel *cost_model, Statistics &stats, bool verbose=false)
LoopNest * create_feature_root(const PostCreateMutator &post_create_mutator) const
void set_gpu_store_site(const map< const LoopNest *, pair< const LoopNest *, int > > &parent, const LoopNest *loop, LoopNest::Sites &site) const
IntrusivePtr< State > make_child() const
bool exceeds_local_memory_limit(const Anderson2021Params ¶ms, const Target &target) const
const LoopNest * deepest_common_ancestor(const map< const LoopNest *, pair< const LoopNest *, int > > &parent, const LoopNest *a, const LoopNest *b) const
void add_to_always_consider_inline_options(const FunctionDAG::Node *node)
void apply_schedule(const FunctionDAG &dag, const Anderson2021Params ¶ms, const Target &target)
bool has_loop_nest_without_thread_loops() const
std::vector< double > cost_per_stage
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
A struct representing a target machine and os to generate code for.