Halide 16.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
IROperator.h
Go to the documentation of this file.
1#ifndef HALIDE_IR_OPERATOR_H
2#define HALIDE_IR_OPERATOR_H
3
4/** \file
5 *
6 * Defines various operator overloads and utility functions that make
7 * it more pleasant to work with Halide expressions.
8 */
9
10#include <cmath>
11
12#include "Expr.h"
13#include "Tuple.h"
14
15namespace Halide {
16
17namespace Internal {
18/** Is the expression either an IntImm, a FloatImm, a StringImm, or a
19 * Cast of the same, or a Ramp or Broadcast of the same. Doesn't do
20 * any constant folding. */
21bool is_const(const Expr &e);
22
23/** Is the expression an IntImm, FloatImm of a particular value, or a
24 * Cast, or Broadcast of the same. */
25bool is_const(const Expr &e, int64_t v);
26
27/** If an expression is an IntImm or a Broadcast of an IntImm, return
28 * a pointer to its value. Otherwise returns nullptr. */
29const int64_t *as_const_int(const Expr &e);
30
31/** If an expression is a UIntImm or a Broadcast of a UIntImm, return
32 * a pointer to its value. Otherwise returns nullptr. */
33const uint64_t *as_const_uint(const Expr &e);
34
35/** If an expression is a FloatImm or a Broadcast of a FloatImm,
36 * return a pointer to its value. Otherwise returns nullptr. */
37const double *as_const_float(const Expr &e);
38
39/** Is the expression a constant integer power of two. Also returns
40 * log base two of the expression if it is. Only returns true for
41 * integer types. */
42bool is_const_power_of_two_integer(const Expr &e, int *bits);
43
44/** Is the expression a const (as defined by is_const), and also
45 * strictly greater than zero (in all lanes, if a vector expression) */
46bool is_positive_const(const Expr &e);
47
48/** Is the expression a const (as defined by is_const), and also
49 * strictly less than zero (in all lanes, if a vector expression) */
50bool is_negative_const(const Expr &e);
51
52/** Is the expression an undef */
53bool is_undef(const Expr &e);
54
55/** Is the expression a const (as defined by is_const), and also equal
56 * to zero (in all lanes, if a vector expression) */
57bool is_const_zero(const Expr &e);
58
59/** Is the expression a const (as defined by is_const), and also equal
60 * to one (in all lanes, if a vector expression) */
61bool is_const_one(const Expr &e);
62
63/** Is the statement a no-op (which we represent as either an
64 * undefined Stmt, or as an Evaluate node of a constant) */
65bool is_no_op(const Stmt &s);
66
67/** Does the expression
68 * 1) Take on the same value no matter where it appears in a Stmt, and
69 * 2) Evaluating it has no side-effects
70 */
71bool is_pure(const Expr &e);
72
73/** Construct an immediate of the given type from any numeric C++ type. */
74// @{
77Expr make_const(Type t, double val);
78inline Expr make_const(Type t, int32_t val) {
79 return make_const(t, (int64_t)val);
80}
81inline Expr make_const(Type t, uint32_t val) {
82 return make_const(t, (uint64_t)val);
83}
84inline Expr make_const(Type t, int16_t val) {
85 return make_const(t, (int64_t)val);
86}
87inline Expr make_const(Type t, uint16_t val) {
88 return make_const(t, (uint64_t)val);
89}
90inline Expr make_const(Type t, int8_t val) {
91 return make_const(t, (int64_t)val);
92}
93inline Expr make_const(Type t, uint8_t val) {
94 return make_const(t, (uint64_t)val);
95}
96inline Expr make_const(Type t, bool val) {
97 return make_const(t, (uint64_t)val);
98}
99inline Expr make_const(Type t, float val) {
100 return make_const(t, (double)val);
101}
103 return make_const(t, (double)val);
104}
105// @}
106
107/** Construct a unique signed_integer_overflow Expr */
109
110/** Check if an expression is a signed_integer_overflow */
112
113/** Check if a constant value can be correctly represented as the given type. */
115
116/** Construct a boolean constant from a C++ boolean value.
117 * May also be a vector if width is given.
118 * It is not possible to coerce a C++ boolean to Expr because
119 * if we provide such a path then char objects can ambiguously
120 * be converted to Halide Expr or to std::string. The problem
121 * is that C++ does not have a real bool type - it is in fact
122 * close enough to char that C++ does not know how to distinguish them.
123 * make_bool is the explicit coercion. */
124Expr make_bool(bool val, int lanes = 1);
125
126/** Construct the representation of zero in the given type */
128
129/** Construct the representation of one in the given type */
131
132/** Construct the representation of two in the given type */
134
135/** Construct the constant boolean true. May also be a vector of
136 * trues, if a lanes argument is given. */
137Expr const_true(int lanes = 1);
138
139/** Construct the constant boolean false. May also be a vector of
140 * falses, if a lanes argument is given. */
141Expr const_false(int lanes = 1);
142
143/** Attempt to cast an expression to a smaller type while provably not
144 * losing information. If it can't be done, return an undefined
145 * Expr. */
147
148/** Attempt to negate x without introducing new IR and without overflow.
149 * If it can't be done, return an undefined Expr. */
151
152/** Coerce the two expressions to have the same type, using C-style
153 * casting rules. For the purposes of casting, a boolean type is
154 * UInt(1). We use the following procedure:
155 *
156 * If the types already match, do nothing.
157 *
158 * Then, if one type is a vector and the other is a scalar, the scalar
159 * is broadcast to match the vector width, and we continue.
160 *
161 * Then, if one type is floating-point and the other is not, the
162 * non-float is cast to the floating-point type, and we're done.
163 *
164 * Then, if both types are unsigned ints, the one with fewer bits is
165 * cast to match the one with more bits and we're done.
166 *
167 * Then, if both types are signed ints, the one with fewer bits is
168 * cast to match the one with more bits and we're done.
169 *
170 * Finally, if one type is an unsigned int and the other type is a signed
171 * int, both are cast to a signed int with the greater of the two
172 * bit-widths. For example, matching an Int(8) with a UInt(16) results
173 * in an Int(16).
174 *
175 */
176void match_types(Expr &a, Expr &b);
177
178/** Asserts that both expressions are integer types and are either
179 * both signed or both unsigned. If one argument is scalar and the
180 * other a vector, the scalar is broadcasted to have the same number
181 * of lanes as the vector. If one expression is of narrower type than
182 * the other, it is widened to the bit width of the wider. */
183void match_types_bitwise(Expr &a, Expr &b, const char *op_name);
184
185/** Halide's vectorizable transcendentals. */
186// @{
190// @}
191
192/** Raise an expression to an integer power by repeatedly multiplying
193 * it by itself. */
195
196/** Split a boolean condition into vector of ANDs. If 'cond' is undefined,
197 * return an empty vector. */
198void split_into_ands(const Expr &cond, std::vector<Expr> &result);
199
200/** A builder to help create Exprs representing halide_buffer_t
201 * structs (e.g. foo.buffer) via calls to halide_buffer_init. Fill out
202 * the fields and then call build. The resulting Expr will be a call
203 * to halide_buffer_init with the struct members as arguments. If the
204 * buffer_memory field is undefined, it uses a call to alloca to make
205 * some stack memory for the buffer. If the shape_memory field is
206 * undefined, it similarly uses stack memory for the shape. If the
207 * shape_memory field is null, it uses the dim field already in the
208 * buffer. Other unitialized fields will take on a value of zero in
209 * the constructed buffer. */
219
220/** If e is a ramp expression with stride, default 1, return the base,
221 * otherwise undefined. */
222Expr strided_ramp_base(const Expr &e, int stride = 1);
223
224/** Implementations of division and mod that are specific to Halide.
225 * Use these implementations; do not use native C division or mod to
226 * simplify Halide expressions. Halide division and modulo satisify
227 * the Euclidean definition of division for integers a and b:
228 *
229 /code
230 when b != 0, (a/b)*b + a%b = a
231 0 <= a%b < |b|
232 /endcode
233 *
234 * Additionally, mod by zero returns zero, and div by zero returns
235 * zero. This makes mod and div total functions.
236 */
237// @{
238template<typename T>
239inline T mod_imp(T a, T b) {
240 Type t = type_of<T>();
241 if (!t.is_float() && b == 0) {
242 return 0;
243 } else if (t.is_int()) {
244 int64_t ia = a;
245 int64_t ib = b;
246 int64_t a_neg = ia >> 63;
247 int64_t b_neg = ib >> 63;
248 int64_t b_zero = (ib == 0) ? -1 : 0;
249 ia -= a_neg;
250 int64_t r = ia % (ib | b_zero);
251 r += (a_neg & ((ib ^ b_neg) + ~b_neg));
252 r &= ~b_zero;
253 return r;
254 } else {
255 return a % b;
256 }
257}
258
259template<typename T>
260inline T div_imp(T a, T b) {
261 Type t = type_of<T>();
262 if (!t.is_float() && b == 0) {
263 return (T)0;
264 } else if (t.is_int()) {
265 // Do it as 64-bit
266 int64_t ia = a;
267 int64_t ib = b;
268 int64_t a_neg = ia >> 63;
269 int64_t b_neg = ib >> 63;
270 int64_t b_zero = (ib == 0) ? -1 : 0;
271 ib -= b_zero;
272 ia -= a_neg;
273 int64_t q = ia / ib;
274 q += a_neg & (~b_neg - b_neg);
275 q &= ~b_zero;
276 return (T)q;
277 } else {
278 return a / b;
279 }
280}
281// @}
282
283// Special cases for float, double.
284template<>
285inline float mod_imp<float>(float a, float b) {
286 float f = a - b * (floorf(a / b));
287 // The remainder has the same sign as b.
288 return f;
289}
290template<>
291inline double mod_imp<double>(double a, double b) {
292 double f = a - b * (std::floor(a / b));
293 return f;
294}
295
296template<>
297inline float div_imp<float>(float a, float b) {
298 return a / b;
299}
300template<>
301inline double div_imp<double>(double a, double b) {
302 return a / b;
303}
304
305/** Return an Expr that is identical to the input Expr, but with
306 * all calls to likely() and likely_if_innermost() removed. */
308
309/** Return a Stmt that is identical to the input Stmt, but with
310 * all calls to likely() and likely_if_innermost() removed. */
312
313/** Return an Expr that is identical to the input Expr, but with
314 * all calls to promise_clamped() and unsafe_promise_clamped() removed. */
316
317/** Return a Stmt that is identical to the input Stmt, but with
318 * all calls to promise_clamped() and unsafe_promise_clamped() removed. */
320
321/** If the expression is a tag helper call, remove it and return
322 * the tagged expression. If not, returns the expression. */
324
325template<typename T>
327 static constexpr bool value = std::is_convertible<T, const char *>::value ||
328 std::is_convertible<T, Halide::Expr>::value;
329};
330
331template<typename... Args>
332struct all_are_printable_args : meta_and<is_printable_arg<Args>...> {};
333
334// Secondary args to print can be Exprs or const char *
335inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args) {
336}
337
338template<typename... Args>
339inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args, const char *arg, Args &&...more_args) {
340 args.emplace_back(std::string(arg));
341 collect_print_args(args, std::forward<Args>(more_args)...);
342}
343
344template<typename... Args>
345inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args, Expr arg, Args &&...more_args) {
346 args.push_back(std::move(arg));
347 collect_print_args(args, std::forward<Args>(more_args)...);
348}
349
350Expr requirement_failed_error(Expr condition, const std::vector<Expr> &args);
351
352Expr memoize_tag_helper(Expr result, const std::vector<Expr> &cache_key_values);
353
354} // namespace Internal
355
356/** Cast an expression to the halide type corresponding to the C++ type T. */
357template<typename T>
358inline Expr cast(Expr a) {
359 return cast(type_of<T>(), std::move(a));
360}
361
362/** Cast an expression to a new type. */
364
365/** Return the sum of two expressions, doing any necessary type
366 * coercion using \ref Internal::match_types */
368
369/** Add an expression and a constant integer. Coerces the type of the
370 * integer to match the type of the expression. Errors if the integer
371 * cannot be represented in the type of the expression. */
372// @{
374
375/** Add a constant integer and an expression. Coerces the type of the
376 * integer to match the type of the expression. Errors if the integer
377 * cannot be represented in the type of the expression. */
379
380/** Modify the first expression to be the sum of two expressions,
381 * without changing its type. This casts the second argument to match
382 * the type of the first. */
384
385/** Return the difference of two expressions, doing any necessary type
386 * coercion using \ref Internal::match_types */
388
389/** Subtracts a constant integer from an expression. Coerces the type of the
390 * integer to match the type of the expression. Errors if the integer
391 * cannot be represented in the type of the expression. */
393
394/** Subtracts an expression from a constant integer. Coerces the type
395 * of the integer to match the type of the expression. Errors if the
396 * integer cannot be represented in the type of the expression. */
398
399/** Return the negative of the argument. Does no type casting, so more
400 * formally: return that number which when added to the original,
401 * yields zero of the same type. For unsigned integers the negative is
402 * still an unsigned integer. E.g. in UInt(8), the negative of 56 is
403 * 200, because 56 + 200 == 0 */
405
406/** Modify the first expression to be the difference of two expressions,
407 * without changing its type. This casts the second argument to match
408 * the type of the first. */
410
411/** Return the product of two expressions, doing any necessary type
412 * coercion using \ref Internal::match_types */
414
415/** Multiply an expression and a constant integer. Coerces the type of the
416 * integer to match the type of the expression. Errors if the integer
417 * cannot be represented in the type of the expression. */
419
420/** Multiply a constant integer and an expression. Coerces the type of
421 * the integer to match the type of the expression. Errors if the
422 * integer cannot be represented in the type of the expression. */
424
425/** Modify the first expression to be the product of two expressions,
426 * without changing its type. This casts the second argument to match
427 * the type of the first. */
429
430/** Return the ratio of two expressions, doing any necessary type
431 * coercion using \ref Internal::match_types. Note that integer
432 * division in Halide is not the same as integer division in C-like
433 * languages in two ways.
434 *
435 * First, signed integer division in Halide rounds according to the
436 * sign of the denominator. This means towards minus infinity for
437 * positive denominators, and towards positive infinity for negative
438 * denominators. This is unlike C, which rounds towards zero. This
439 * decision ensures that upsampling expressions like f(x/2, y/2) don't
440 * have funny discontinuities when x and y cross zero.
441 *
442 * Second, division by zero returns zero instead of faulting. For
443 * types where overflow is defined behavior, division of the largest
444 * negative signed integer by -1 returns the larged negative signed
445 * integer for the type (i.e. it wraps). This ensures that a division
446 * operation can never have a side-effect, which is helpful in Halide
447 * because scheduling directives can expand the domain of computation
448 * of a Func, potentially introducing new zero-division.
449 */
451
452/** Modify the first expression to be the ratio of two expressions,
453 * without changing its type. This casts the second argument to match
454 * the type of the first. Note that signed integer division in Halide
455 * rounds towards minus infinity, unlike C, which rounds towards
456 * zero. */
458
459/** Divides an expression by a constant integer. Coerces the type
460 * of the integer to match the type of the expression. Errors if the
461 * integer cannot be represented in the type of the expression. */
463
464/** Divides a constant integer by an expression. Coerces the type
465 * of the integer to match the type of the expression. Errors if the
466 * integer cannot be represented in the type of the expression. */
468
469/** Return the first argument reduced modulo the second, doing any
470 * necessary type coercion using \ref Internal::match_types. There are
471 * two key differences between C-like languages and Halide for the
472 * modulo operation, which complement the way division works.
473 *
474 * First, the result is never negative, so x % 2 is always zero or
475 * one, unlike in C-like languages. x % -2 is equivalent, and is also
476 * always zero or one. Second, mod by zero evaluates to zero (unlike
477 * in C, where it faults). This makes modulo, like division, a
478 * side-effect-free operation. */
480
481/** Mods an expression by a constant integer. Coerces the type
482 * of the integer to match the type of the expression. Errors if the
483 * integer cannot be represented in the type of the expression. */
485
486/** Mods a constant integer by an expression. Coerces the type
487 * of the integer to match the type of the expression. Errors if the
488 * integer cannot be represented in the type of the expression. */
490
491/** Return a boolean expression that tests whether the first argument
492 * is greater than the second, after doing any necessary type coercion
493 * using \ref Internal::match_types */
495
496/** Return a boolean expression that tests whether an expression is
497 * greater than a constant integer. Coerces the integer to the type of
498 * the expression. Errors if the integer is not representable in that
499 * type. */
501
502/** Return a boolean expression that tests whether a constant integer is
503 * greater than an expression. Coerces the integer to the type of
504 * the expression. Errors if the integer is not representable in that
505 * type. */
507
508/** Return a boolean expression that tests whether the first argument
509 * is less than the second, after doing any necessary type coercion
510 * using \ref Internal::match_types */
512
513/** Return a boolean expression that tests whether an expression is
514 * less than a constant integer. Coerces the integer to the type of
515 * the expression. Errors if the integer is not representable in that
516 * type. */
518
519/** Return a boolean expression that tests whether a constant integer is
520 * less than an expression. Coerces the integer to the type of
521 * the expression. Errors if the integer is not representable in that
522 * type. */
524
525/** Return a boolean expression that tests whether the first argument
526 * is less than or equal to the second, after doing any necessary type
527 * coercion using \ref Internal::match_types */
529
530/** Return a boolean expression that tests whether an expression is
531 * less than or equal to a constant integer. Coerces the integer to
532 * the type of the expression. Errors if the integer is not
533 * representable in that type. */
535
536/** Return a boolean expression that tests whether a constant integer
537 * is less than or equal to an expression. Coerces the integer to the
538 * type of the expression. Errors if the integer is not representable
539 * in that type. */
541
542/** Return a boolean expression that tests whether the first argument
543 * is greater than or equal to the second, after doing any necessary
544 * type coercion using \ref Internal::match_types */
546
547/** Return a boolean expression that tests whether an expression is
548 * greater than or equal to a constant integer. Coerces the integer to
549 * the type of the expression. Errors if the integer is not
550 * representable in that type. */
551Expr operator>=(const Expr &a, int b);
552
553/** Return a boolean expression that tests whether a constant integer
554 * is greater than or equal to an expression. Coerces the integer to the
555 * type of the expression. Errors if the integer is not representable
556 * in that type. */
557Expr operator>=(int a, const Expr &b);
558
559/** Return a boolean expression that tests whether the first argument
560 * is equal to the second, after doing any necessary type coercion
561 * using \ref Internal::match_types */
563
564/** Return a boolean expression that tests whether an expression is
565 * equal to a constant integer. Coerces the integer to the type of the
566 * expression. Errors if the integer is not representable in that
567 * type. */
569
570/** Return a boolean expression that tests whether a constant integer
571 * is equal to an expression. Coerces the integer to the type of the
572 * expression. Errors if the integer is not representable in that
573 * type. */
575
576/** Return a boolean expression that tests whether the first argument
577 * is not equal to the second, after doing any necessary type coercion
578 * using \ref Internal::match_types */
580
581/** Return a boolean expression that tests whether an expression is
582 * not equal to a constant integer. Coerces the integer to the type of
583 * the expression. Errors if the integer is not representable in that
584 * type. */
586
587/** Return a boolean expression that tests whether a constant integer
588 * is not equal to an expression. Coerces the integer to the type of
589 * the expression. Errors if the integer is not representable in that
590 * type. */
592
593/** Returns the logical and of the two arguments */
595
596/** Logical and of an Expr and a bool. Either returns the Expr or an
597 * Expr representing false, depending on the bool. */
598// @{
601// @}
602
603/** Returns the logical or of the two arguments */
605
606/** Logical or of an Expr and a bool. Either returns the Expr or an
607 * Expr representing true, depending on the bool. */
608// @{
611// @}
612
613/** Returns the logical not the argument */
615
616/** Returns an expression representing the greater of the two
617 * arguments, after doing any necessary type coercion using
618 * \ref Internal::match_types. Vectorizes cleanly on most platforms
619 * (with the exception of integer types on x86 without SSE4). */
621
622/** Returns an expression representing the greater of an expression
623 * and a constant integer. The integer is coerced to the type of the
624 * expression. Errors if the integer is not representable as that
625 * type. Vectorizes cleanly on most platforms (with the exception of
626 * integer types on x86 without SSE4). */
627Expr max(Expr a, int b);
628
629/** Returns an expression representing the greater of a constant
630 * integer and an expression. The integer is coerced to the type of
631 * the expression. Errors if the integer is not representable as that
632 * type. Vectorizes cleanly on most platforms (with the exception of
633 * integer types on x86 without SSE4). */
634Expr max(int a, Expr b);
635
636inline Expr max(float a, Expr b) {
637 return max(Expr(a), std::move(b));
638}
639inline Expr max(Expr a, float b) {
640 return max(std::move(a), Expr(b));
641}
642
643/** Returns an expression representing the greater of an expressions
644 * vector, after doing any necessary type coersion using
645 * \ref Internal::match_types. Vectorizes cleanly on most platforms
646 * (with the exception of integer types on x86 without SSE4).
647 * The expressions are folded from right ie. max(.., max(.., ..)).
648 * The arguments can be any mix of types but must all be convertible to Expr. */
649template<typename A, typename B, typename C, typename... Rest,
650 typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Rest...>::value>::type * = nullptr>
651inline Expr max(A &&a, B &&b, C &&c, Rest &&...rest) {
652 return max(std::forward<A>(a), max(std::forward<B>(b), std::forward<C>(c), std::forward<Rest>(rest)...));
653}
654
656
657/** Returns an expression representing the lesser of an expression
658 * and a constant integer. The integer is coerced to the type of the
659 * expression. Errors if the integer is not representable as that
660 * type. Vectorizes cleanly on most platforms (with the exception of
661 * integer types on x86 without SSE4). */
662Expr min(Expr a, int b);
663
664/** Returns an expression representing the lesser of a constant
665 * integer and an expression. The integer is coerced to the type of
666 * the expression. Errors if the integer is not representable as that
667 * type. Vectorizes cleanly on most platforms (with the exception of
668 * integer types on x86 without SSE4). */
669Expr min(int a, Expr b);
670
671inline Expr min(float a, Expr b) {
672 return min(Expr(a), std::move(b));
673}
674inline Expr min(Expr a, float b) {
675 return min(std::move(a), Expr(b));
676}
677
678/** Returns an expression representing the lesser of an expressions
679 * vector, after doing any necessary type coersion using
680 * \ref Internal::match_types. Vectorizes cleanly on most platforms
681 * (with the exception of integer types on x86 without SSE4).
682 * The expressions are folded from right ie. min(.., min(.., ..)).
683 * The arguments can be any mix of types but must all be convertible to Expr. */
684template<typename A, typename B, typename C, typename... Rest,
685 typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Rest...>::value>::type * = nullptr>
686inline Expr min(A &&a, B &&b, C &&c, Rest &&...rest) {
687 return min(std::forward<A>(a), min(std::forward<B>(b), std::forward<C>(c), std::forward<Rest>(rest)...));
688}
689
690/** Operators on floats treats those floats as Exprs. Making these
691 * explicit prevents implicit float->int casts that might otherwise
692 * occur. */
693// @{
694inline Expr operator+(Expr a, float b) {
695 return std::move(a) + Expr(b);
696}
697inline Expr operator+(float a, Expr b) {
698 return Expr(a) + std::move(b);
699}
700inline Expr operator-(Expr a, float b) {
701 return std::move(a) - Expr(b);
702}
703inline Expr operator-(float a, Expr b) {
704 return Expr(a) - std::move(b);
705}
706inline Expr operator*(Expr a, float b) {
707 return std::move(a) * Expr(b);
708}
709inline Expr operator*(float a, Expr b) {
710 return Expr(a) * std::move(b);
711}
712inline Expr operator/(Expr a, float b) {
713 return std::move(a) / Expr(b);
714}
715inline Expr operator/(float a, Expr b) {
716 return Expr(a) / std::move(b);
717}
718inline Expr operator%(Expr a, float b) {
719 return std::move(a) % Expr(b);
720}
721inline Expr operator%(float a, Expr b) {
722 return Expr(a) % std::move(b);
723}
724inline Expr operator>(Expr a, float b) {
725 return std::move(a) > Expr(b);
726}
727inline Expr operator>(float a, Expr b) {
728 return Expr(a) > std::move(b);
729}
730inline Expr operator<(Expr a, float b) {
731 return std::move(a) < Expr(b);
732}
733inline Expr operator<(float a, Expr b) {
734 return Expr(a) < std::move(b);
735}
736inline Expr operator>=(Expr a, float b) {
737 return std::move(a) >= Expr(b);
738}
739inline Expr operator>=(float a, Expr b) {
740 return Expr(a) >= std::move(b);
741}
742inline Expr operator<=(Expr a, float b) {
743 return std::move(a) <= Expr(b);
744}
745inline Expr operator<=(float a, Expr b) {
746 return Expr(a) <= std::move(b);
747}
748inline Expr operator==(Expr a, float b) {
749 return std::move(a) == Expr(b);
750}
751inline Expr operator==(float a, Expr b) {
752 return Expr(a) == std::move(b);
753}
754inline Expr operator!=(Expr a, float b) {
755 return std::move(a) != Expr(b);
756}
757inline Expr operator!=(float a, Expr b) {
758 return Expr(a) != std::move(b);
759}
760// @}
761
762/** Clamps an expression to lie within the given bounds. The bounds
763 * are type-cast to match the expression. Vectorizes as well as min/max. */
764Expr clamp(Expr a, const Expr &min_val, const Expr &max_val);
765
766/** Returns the absolute value of a signed integer or floating-point
767 * expression. Vectorizes cleanly. Unlike in C, abs of a signed
768 * integer returns an unsigned integer of the same bit width. This
769 * means that abs of the most negative integer doesn't overflow. */
771
772/** Return the absolute difference between two values. Vectorizes
773 * cleanly. Returns an unsigned value of the same bit width. There are
774 * various ways to write this yourself, but they contain numerous
775 * gotchas and don't always compile to good code, so use this
776 * instead. */
778
779/** Returns an expression similar to the ternary operator in C, except
780 * that it always evaluates all arguments. If the first argument is
781 * true, then return the second, else return the third. Typically
782 * vectorizes cleanly, but benefits from SSE41 or newer on x86. */
783Expr select(Expr condition, Expr true_value, Expr false_value);
784
785/** A multi-way variant of select similar to a switch statement in C,
786 * which can accept multiple conditions and values in pairs. Evaluates
787 * to the first value for which the condition is true. Returns the
788 * final value if all conditions are false. */
789template<typename... Args,
790 typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Args...>::value>::type * = nullptr>
791inline Expr select(Expr c0, Expr v0, Expr c1, Expr v1, Args &&...args) {
792 return select(std::move(c0), std::move(v0), select(std::move(c1), std::move(v1), std::forward<Args>(args)...));
793}
794
795/** Equivalent of ternary select(), but taking/returning tuples. If the condition is
796 * a Tuple, it must match the size of the true and false Tuples. */
797// @{
798Tuple tuple_select(const Tuple &condition, const Tuple &true_value, const Tuple &false_value);
799Tuple tuple_select(const Expr &condition, const Tuple &true_value, const Tuple &false_value);
800// @}
801
802/** Equivalent of multiway select(), but taking/returning tuples. If the condition is
803 * a Tuple, it must match the size of the true and false Tuples. */
804// @{
805template<typename... Args>
806inline Tuple tuple_select(const Tuple &c0, const Tuple &v0, const Tuple &c1, const Tuple &v1, Args &&...args) {
807 return tuple_select(c0, v0, tuple_select(c1, v1, std::forward<Args>(args)...));
808}
809
810template<typename... Args>
811inline Tuple tuple_select(const Expr &c0, const Tuple &v0, const Expr &c1, const Tuple &v1, Args &&...args) {
812 return tuple_select(c0, v0, tuple_select(c1, v1, std::forward<Args>(args)...));
813}
814// @}
815
816/** Oftentimes we want to pack a list of expressions with the same type
817 * into a channel dimension, e.g.,
818 * img(x, y, c) = select(c == 0, 100, // Red
819 * c == 1, 50, // Green
820 * 25); // Blue
821 * This is tedious when the list is long. The following function
822 * provide convinent syntax that allow one to write:
823 * img(x, y, c) = mux(c, {100, 50, 25});
824 *
825 * As with the select equivalent, if the first argument (the index) is
826 * out of range, the expression evaluates to the last value.
827 */
828// @{
829Expr mux(const Expr &id, const std::initializer_list<Expr> &values);
830Expr mux(const Expr &id, const std::vector<Expr> &values);
831Expr mux(const Expr &id, const Tuple &values);
832// @}
833
834/** Return the sine of a floating-point expression. If the argument is
835 * not floating-point, it is cast to Float(32). Does not vectorize
836 * well. */
838
839/** Return the arcsine of a floating-point expression. If the argument
840 * is not floating-point, it is cast to Float(32). Does not vectorize
841 * well. */
843
844/** Return the cosine of a floating-point expression. If the argument
845 * is not floating-point, it is cast to Float(32). Does not vectorize
846 * well. */
848
849/** Return the arccosine of a floating-point expression. If the
850 * argument is not floating-point, it is cast to Float(32). Does not
851 * vectorize well. */
853
854/** Return the tangent of a floating-point expression. If the argument
855 * is not floating-point, it is cast to Float(32). Does not vectorize
856 * well. */
858
859/** Return the arctangent of a floating-point expression. If the
860 * argument is not floating-point, it is cast to Float(32). Does not
861 * vectorize well. */
863
864/** Return the angle of a floating-point gradient. If the argument is
865 * not floating-point, it is cast to Float(32). Does not vectorize
866 * well. */
868
869/** Return the hyperbolic sine of a floating-point expression. If the
870 * argument is not floating-point, it is cast to Float(32). Does not
871 * vectorize well. */
873
874/** Return the hyperbolic arcsinhe of a floating-point expression. If
875 * the argument is not floating-point, it is cast to Float(32). Does
876 * not vectorize well. */
878
879/** Return the hyperbolic cosine of a floating-point expression. If
880 * the argument is not floating-point, it is cast to Float(32). Does
881 * not vectorize well. */
883
884/** Return the hyperbolic arccosine of a floating-point expression.
885 * If the argument is not floating-point, it is cast to
886 * Float(32). Does not vectorize well. */
888
889/** Return the hyperbolic tangent of a floating-point expression. If
890 * the argument is not floating-point, it is cast to Float(32). Does
891 * not vectorize well. */
893
894/** Return the hyperbolic arctangent of a floating-point expression.
895 * If the argument is not floating-point, it is cast to
896 * Float(32). Does not vectorize well. */
898
899/** Return the square root of a floating-point expression. If the
900 * argument is not floating-point, it is cast to Float(32). Typically
901 * vectorizes cleanly. */
903
904/** Return the square root of the sum of the squares of two
905 * floating-point expressions. If the argument is not floating-point,
906 * it is cast to Float(32). Vectorizes cleanly. */
907Expr hypot(const Expr &x, const Expr &y);
908
909/** Return the exponential of a floating-point expression. If the
910 * argument is not floating-point, it is cast to Float(32). For
911 * Float(64) arguments, this calls the system exp function, and does
912 * not vectorize well. For Float(32) arguments, this function is
913 * vectorizable, does the right thing for extremely small or extremely
914 * large inputs, and is accurate up to the last bit of the
915 * mantissa. Vectorizes cleanly. */
917
918/** Return the logarithm of a floating-point expression. If the
919 * argument is not floating-point, it is cast to Float(32). For
920 * Float(64) arguments, this calls the system log function, and does
921 * not vectorize well. For Float(32) arguments, this function is
922 * vectorizable, does the right thing for inputs <= 0 (returns -inf or
923 * nan), and is accurate up to the last bit of the
924 * mantissa. Vectorizes cleanly. */
926
927/** Return one floating point expression raised to the power of
928 * another. The type of the result is given by the type of the first
929 * argument. If the first argument is not a floating-point type, it is
930 * cast to Float(32). For Float(32), cleanly vectorizable, and
931 * accurate up to the last few bits of the mantissa. Gets worse when
932 * approaching overflow. Vectorizes cleanly. */
934
935/** Evaluate the error function erf. Only available for
936 * Float(32). Accurate up to the last three bits of the
937 * mantissa. Vectorizes cleanly. */
938Expr erf(const Expr &x);
939
940/** Fast vectorizable approximation to some trigonometric functions for Float(32).
941 * Absolute approximation error is less than 1e-5. */
942// @{
945// @}
946
947/** Fast approximate cleanly vectorizable log for Float(32). Returns
948 * nonsense for x <= 0.0f. Accurate up to the last 5 bits of the
949 * mantissa. Vectorizes cleanly. */
951
952/** Fast approximate cleanly vectorizable exp for Float(32). Returns
953 * nonsense for inputs that would overflow or underflow. Typically
954 * accurate up to the last 5 bits of the mantissa. Gets worse when
955 * approaching overflow. Vectorizes cleanly. */
957
958/** Fast approximate cleanly vectorizable pow for Float(32). Returns
959 * nonsense for x < 0.0f. Accurate up to the last 5 bits of the
960 * mantissa for typical exponents. Gets worse when approaching
961 * overflow. Vectorizes cleanly. */
963
964/** Fast approximate inverse for Float(32). Corresponds to the rcpps
965 * instruction on x86, and the vrecpe instruction on ARM. Vectorizes
966 * cleanly. Note that this can produce slightly different results
967 * across different implementations of the same architecture (e.g. AMD vs Intel),
968 * even when strict_float is enabled. */
970
971/** Fast approximate inverse square root for Float(32). Corresponds to
972 * the rsqrtps instruction on x86, and the vrsqrte instruction on
973 * ARM. Vectorizes cleanly. Note that this can produce slightly different results
974 * across different implementations of the same architecture (e.g. AMD vs Intel),
975 * even when strict_float is enabled. */
977
978/** Return the greatest whole number less than or equal to a
979 * floating-point expression. If the argument is not floating-point,
980 * it is cast to Float(32). The return value is still in floating
981 * point, despite being a whole number. Vectorizes cleanly. */
983
984/** Return the least whole number greater than or equal to a
985 * floating-point expression. If the argument is not floating-point,
986 * it is cast to Float(32). The return value is still in floating
987 * point, despite being a whole number. Vectorizes cleanly. */
989
990/** Return the whole number closest to a floating-point expression. If the
991 * argument is not floating-point, it is cast to Float(32). The return value is
992 * still in floating point, despite being a whole number. On ties, we round
993 * towards the nearest even integer. Note that this is not the same as
994 * std::round in C, which rounds away from zero. On platforms without a native
995 * instruction for this, it is emulated, and may be more expensive than
996 * cast<int>(x + 0.5f) or similar. */
998
999/** Return the integer part of a floating-point expression. If the argument is
1000 * not floating-point, it is cast to Float(32). The return value is still in
1001 * floating point, despite being a whole number. Vectorizes cleanly. */
1003
1004/** Returns true if the argument is a Not a Number (NaN). Requires a
1005 * floating point argument. Vectorizes cleanly.
1006 * Note that the Expr passed in will be evaluated in strict_float mode,
1007 * regardless of whether strict_float mode is enabled in the current Target. */
1009
1010/** Returns true if the argument is Inf or -Inf. Requires a
1011 * floating point argument. Vectorizes cleanly.
1012 * Note that the Expr passed in will be evaluated in strict_float mode,
1013 * regardless of whether strict_float mode is enabled in the current Target. */
1015
1016/** Returns true if the argument is a finite value (ie, neither NaN nor Inf).
1017 * Requires a floating point argument. Vectorizes cleanly.
1018 * Note that the Expr passed in will be evaluated in strict_float mode,
1019 * regardless of whether strict_float mode is enabled in the current Target. */
1021
1022/** Return the fractional part of a floating-point expression. If the argument
1023 * is not floating-point, it is cast to Float(32). The return value has the
1024 * same sign as the original expression. Vectorizes cleanly. */
1025Expr fract(const Expr &x);
1026
1027/** Reinterpret the bits of one value as another type. */
1029
1030template<typename T>
1032 return reinterpret(type_of<T>(), std::move(e));
1033}
1034
1035/** Return the bitwise and of two expressions (which need not have the
1036 * same type). The result type is the wider of the two expressions.
1037 * Only integral types are allowed and both expressions must be signed
1038 * or both must be unsigned. */
1040
1041/** Return the bitwise and of an expression and an integer. The type
1042 * of the result is the type of the expression argument. */
1043// @{
1046// @}
1047
1048/** Return the bitwise or of two expressions (which need not have the
1049 * same type). The result type is the wider of the two expressions.
1050 * Only integral types are allowed and both expressions must be signed
1051 * or both must be unsigned. */
1053
1054/** Return the bitwise or of an expression and an integer. The type of
1055 * the result is the type of the expression argument. */
1056// @{
1059// @}
1060
1061/** Return the bitwise xor of two expressions (which need not have the
1062 * same type). The result type is the wider of the two expressions.
1063 * Only integral types are allowed and both expressions must be signed
1064 * or both must be unsigned. */
1066
1067/** Return the bitwise xor of an expression and an integer. The type
1068 * of the result is the type of the expression argument. */
1069// @{
1072// @}
1073
1074/** Return the bitwise not of an expression. */
1076
1077/** Shift the bits of an integer value left. This is actually less
1078 * efficient than multiplying by 2^n, because Halide's optimization
1079 * passes understand multiplication, and will compile it to
1080 * shifting. This operator is only for if you really really need bit
1081 * shifting (e.g. because the exponent is a run-time parameter). The
1082 * type of the result is equal to the type of the first argument. Both
1083 * arguments must have integer type. */
1084// @{
1087// @}
1088
1089/** Shift the bits of an integer value right. Does sign extension for
1090 * signed integers. This is less efficient than dividing by a power of
1091 * two. Halide's definition of division (always round to negative
1092 * infinity) means that all divisions by powers of two get compiled to
1093 * bit-shifting, and Halide's optimization routines understand
1094 * division and can work with it. The type of the result is equal to
1095 * the type of the first argument. Both arguments must have integer
1096 * type. */
1097// @{
1100// @}
1101
1102/** Linear interpolate between the two values according to a weight.
1103 * \param zero_val The result when weight is 0
1104 * \param one_val The result when weight is 1
1105 * \param weight The interpolation amount
1106 *
1107 * Both zero_val and one_val must have the same type. All types are
1108 * supported, including bool.
1109 *
1110 * The weight is treated as its own type and must be float or an
1111 * unsigned integer type. It is scaled to the bit-size of the type of
1112 * x and y if they are integer, or converted to float if they are
1113 * float. Integer weights are converted to float via division by the
1114 * full-range value of the weight's type. Floating-point weights used
1115 * to interpolate between integer values must be between 0.0f and
1116 * 1.0f, and an error may be signaled if it is not provably so. (clamp
1117 * operators can be added to provide proof. Currently an error is only
1118 * signalled for constant weights.)
1119 *
1120 * For integer linear interpolation, out of range values cannot be
1121 * represented. In particular, weights that are conceptually less than
1122 * 0 or greater than 1.0 are not representable. As such the result is
1123 * always between x and y (inclusive of course). For lerp with
1124 * floating-point values and floating-point weight, the full range of
1125 * a float is valid, however underflow and overflow can still occur.
1126 *
1127 * Ordering is not required between zero_val and one_val:
1128 * lerp(42, 69, .5f) == lerp(69, 42, .5f) == 56
1129 *
1130 * Results for integer types are for exactly rounded arithmetic. As
1131 * such, there are cases where 16-bit and float differ because 32-bit
1132 * floating-point (float) does not have enough precision to produce
1133 * the exact result. (Likely true for 32-bit integer
1134 * vs. double-precision floating-point as well.)
1135 *
1136 * At present, double precision and 64-bit integers are not supported.
1137 *
1138 * Generally, lerp will vectorize as if it were an operation on a type
1139 * twice the bit size of the inferred type for x and y.
1140 *
1141 * Some examples:
1142 * \code
1143 *
1144 * // Since Halide does not have direct type delcarations, casts
1145 * // below are used to indicate the types of the parameters.
1146 * // Such casts not required or expected in actual code where types
1147 * // are inferred.
1148 *
1149 * lerp(cast<float>(x), cast<float>(y), cast<float>(w)) ->
1150 * x * (1.0f - w) + y * w
1151 *
1152 * lerp(cast<uint8_t>(x), cast<uint8_t>(y), cast<uint8_t>(w)) ->
1153 * cast<uint8_t>(cast<uint8_t>(x) * (1.0f - cast<uint8_t>(w) / 255.0f) +
1154 * cast<uint8_t>(y) * cast<uint8_t>(w) / 255.0f + .5f)
1155 *
1156 * // Note addition in Halide promoted uint8_t + int8_t to int16_t already,
1157 * // the outer cast is added for clarity.
1158 * lerp(cast<uint8_t>(x), cast<int8_t>(y), cast<uint8_t>(w)) ->
1159 * cast<int16_t>(cast<uint8_t>(x) * (1.0f - cast<uint8_t>(w) / 255.0f) +
1160 * cast<int8_t>(y) * cast<uint8_t>(w) / 255.0f + .5f)
1161 *
1162 * lerp(cast<int8_t>(x), cast<int8_t>(y), cast<float>(w)) ->
1163 * cast<int8_t>(cast<int8_t>(x) * (1.0f - cast<float>(w)) +
1164 * cast<int8_t>(y) * cast<uint8_t>(w))
1165 *
1166 * \endcode
1167 * */
1169
1170/** Count the number of set bits in an expression. */
1172
1173/** Count the number of leading zero bits in an expression. If the expression is
1174 * zero, the result is the number of bits in the type. */
1176
1177/** Count the number of trailing zero bits in an expression. If the expression is
1178 * zero, the result is the number of bits in the type. */
1180
1181/** Divide two integers, rounding towards zero. This is the typical
1182 * behavior of most hardware architectures, which differs from
1183 * Halide's division operator, which is Euclidean (rounds towards
1184 * -infinity). Will throw a runtime error if y is zero, or if y is -1
1185 * and x is the minimum signed integer. */
1187
1188/** Compute the remainder of dividing two integers, when division is
1189 * rounding toward zero. This is the typical behavior of most hardware
1190 * architectures, which differs from Halide's mod operator, which is
1191 * Euclidean (produces the remainder when division rounds towards
1192 * -infinity). Will throw a runtime error if y is zero. */
1194
1195/** Return a random variable representing a uniformly distributed
1196 * float in the half-open interval [0.0f, 1.0f). For random numbers of
1197 * other types, use lerp with a random float as the last parameter.
1198 *
1199 * Optionally takes a seed.
1200 *
1201 * Note that:
1202 \code
1203 Expr x = random_float();
1204 Expr y = x + x;
1205 \endcode
1206 *
1207 * is very different to
1208 *
1209 \code
1210 Expr y = random_float() + random_float();
1211 \endcode
1212 *
1213 * The first doubles a random variable, and the second adds two
1214 * independent random variables.
1215 *
1216 * A given random variable takes on a unique value that depends
1217 * deterministically on the pure variables of the function they belong
1218 * to, the identity of the function itself, and which definition of
1219 * the function it is used in. They are, however, shared across tuple
1220 * elements.
1221 *
1222 * This function vectorizes cleanly.
1223 */
1225
1226/** Return a random variable representing a uniformly distributed
1227 * unsigned 32-bit integer. See \ref random_float. Vectorizes cleanly. */
1229
1230/** Return a random variable representing a uniformly distributed
1231 * 32-bit integer. See \ref random_float. Vectorizes cleanly. */
1233
1234/** Create an Expr that prints out its value whenever it is
1235 * evaluated. It also prints out everything else in the arguments
1236 * list, separated by spaces. This can include string literals. */
1237//@{
1238Expr print(const std::vector<Expr> &values);
1239
1240template<typename... Args>
1242 std::vector<Expr> collected_args = {std::move(a)};
1243 Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1244 return print(collected_args);
1245}
1246//@}
1247
1248/** Create an Expr that prints whenever it is evaluated, provided that
1249 * the condition is true. */
1250// @{
1251Expr print_when(Expr condition, const std::vector<Expr> &values);
1252
1253template<typename... Args>
1254inline HALIDE_NO_USER_CODE_INLINE Expr print_when(Expr condition, Expr a, Args &&...args) {
1255 std::vector<Expr> collected_args = {std::move(a)};
1256 Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1257 return print_when(std::move(condition), collected_args);
1258}
1259
1260// @}
1261
1262/** Create an Expr that that guarantees a precondition.
1263 * If 'condition' is true, the return value is equal to the first Expr.
1264 * If 'condition' is false, halide_error() is called, and the return value
1265 * is arbitrary. Any additional arguments after the first Expr are stringified
1266 * and passed as a user-facing message to halide_error(), similar to print().
1267 *
1268 * Note that this essentially *always* inserts a runtime check into the
1269 * generated code (except when the condition can be proven at compile time);
1270 * as such, it should be avoided inside inner loops, except for debugging
1271 * or testing purposes. Note also that it does not vectorize cleanly (vector
1272 * values will be scalarized for the check).
1273 *
1274 * However, using this to make assertions about (say) input values
1275 * can be useful, both in terms of correctness and (potentially) in terms
1276 * of code generation, e.g.
1277 \code
1278 Param<int> p;
1279 Expr y = require(p > 0, p);
1280 \endcode
1281 * will allow the optimizer to assume positive, nonzero values for y.
1282 */
1283// @{
1284Expr require(Expr condition, const std::vector<Expr> &values);
1285
1286template<typename... Args>
1287inline HALIDE_NO_USER_CODE_INLINE Expr require(Expr condition, Expr value, Args &&...args) {
1288 std::vector<Expr> collected_args = {std::move(value)};
1289 Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1290 return require(std::move(condition), collected_args);
1291}
1292// @}
1293
1294/** Return an undef value of the given type. Halide skips stores that
1295 * depend on undef values, so you can use this to mean "do not modify
1296 * this memory location". This is an escape hatch that can be used for
1297 * several things:
1298 *
1299 * You can define a reduction with no pure step, by setting the pure
1300 * step to undef. Do this only if you're confident that the update
1301 * steps are sufficient to correctly fill in the domain.
1302 *
1303 * For a tuple-valued reduction, you can write an update step that
1304 * only updates some tuple elements.
1305 *
1306 * You can define single-stage pipeline that only has update steps,
1307 * and depends on the values already in the output buffer.
1308 *
1309 * Use this feature with great caution, as you can use it to load from
1310 * uninitialized memory.
1311 */
1313
1314template<typename T>
1315inline Expr undef() {
1316 return undef(type_of<T>());
1317}
1318
1319namespace Internal {
1320
1321/** Return an expression that should never be evaluated. Expressions
1322 * that depend on unreachabale values are also unreachable, and
1323 * statements that execute unreachable expressions are also considered
1324 * unreachable. */
1326
1327template<typename T>
1329 return unreachable(type_of<T>());
1330}
1331
1332} // namespace Internal
1333
1334/** Control the values used in the memoization cache key for memoize.
1335 * Normally parameters and other external dependencies are
1336 * automatically inferred and added to the cache key. The memoize_tag
1337 * operator allows computing one expression and using either the
1338 * computed value, or one or more other expressions in the cache key
1339 * instead of the parameter dependencies of the computation. The
1340 * single argument version is completely safe in that the cache key
1341 * will use the actual computed value -- it is difficult or imposible
1342 * to produce erroneous caching this way. The more-than-one argument
1343 * version allows generating cache keys that do not uniquely identify
1344 * the computation and thus can result in caching errors.
1345 *
1346 * A potential use for the single argument version is to handle a
1347 * floating-point parameter that is quantized to a small
1348 * integer. Mutliple values of the float will produce the same integer
1349 * and moving the caching to using the integer for the key is more
1350 * efficient.
1351 *
1352 * The main use for the more-than-one argument version is to provide
1353 * cache key information for Handles and ImageParams, which otherwise
1354 * are not allowed inside compute_cached operations. E.g. when passing
1355 * a group of parameters to an external array function via a Handle,
1356 * memoize_tag can be used to isolate the actual values used by that
1357 * computation. If an ImageParam is a constant image with a persistent
1358 * digest, memoize_tag can be used to key computations using that image
1359 * on the digest. */
1360// @{
1361template<typename... Args>
1363 std::vector<Expr> collected_args{std::forward<Args>(args)...};
1364 return Internal::memoize_tag_helper(std::move(result), collected_args);
1365}
1366// @}
1367
1368/** Expressions tagged with this intrinsic are considered to be part
1369 * of the steady state of some loop with a nasty beginning and end
1370 * (e.g. a boundary condition). When Halide encounters likely
1371 * intrinsics, it splits the containing loop body into three, and
1372 * tries to simplify down all conditions that lead to the likely. For
1373 * example, given the expression: select(x < 1, bar, x > 10, bar,
1374 * likely(foo)), Halide will split the loop over x into portions where
1375 * x < 1, 1 <= x <= 10, and x > 10.
1376 *
1377 * You're unlikely to want to call this directly. You probably want to
1378 * use the boundary condition helpers in the BoundaryConditions
1379 * namespace instead.
1380 */
1382
1383/** Equivalent to likely, but only triggers a loop partitioning if
1384 * found in an innermost loop. */
1386
1387/** Cast an expression to the halide type corresponding to the C++
1388 * type T. As part of the cast, clamp to the minimum and maximum
1389 * values of the result type. */
1390template<typename T>
1392 return saturating_cast(type_of<T>(), std::move(e));
1393}
1394
1395/** Cast an expression to a new type, clamping to the minimum and
1396 * maximum values of the result type. */
1398
1399/** Makes a best effort attempt to preserve IEEE floating-point
1400 * semantics in evaluating an expression. May not be implemented for
1401 * all backends. (E.g. it is difficult to do this for C++ code
1402 * generation as it depends on the compiler flags used to compile the
1403 * generated code. */
1405
1406/** Create an Expr that that promises another Expr is clamped but do
1407 * not generate code to check the assertion or modify the value. No
1408 * attempt is made to prove the bound at compile time. (If it is
1409 * proved false as a result of something else, an error might be
1410 * generated, but it is also possible the compiler will crash.) The
1411 * promised bound is used in bounds inference so it will allow
1412 * satisfying bounds checks as well as possibly aiding optimization.
1413 *
1414 * unsafe_promise_clamped returns its first argument, the Expr 'value'
1415 *
1416 * This is a very easy way to make Halide generate erroneous code if
1417 * the bound promises is not kept. Use sparingly when there is no
1418 * other way to convey the information to the compiler and it is
1419 * required for a valuable optimization.
1420 *
1421 * Unsafe promises can be checked by turning on
1422 * Target::CheckUnsafePromises. This is intended for debugging only.
1423 */
1424Expr unsafe_promise_clamped(const Expr &value, const Expr &min, const Expr &max);
1425
1426namespace Internal {
1427/**
1428 * FOR INTERNAL USE ONLY.
1429 *
1430 * An entirely unchecked version of unsafe_promise_clamped, used
1431 * inside the compiler as an annotation of the known bounds of an Expr
1432 * when it has proved something is bounded and wants to record that
1433 * fact for later passes (notably bounds inference) to exploit. This
1434 * gets introduced by GuardWithIf tail strategies, because the bounds
1435 * machinery has a hard time exploiting if statement conditions.
1436 *
1437 * Unlike unsafe_promise_clamped, this expression is
1438 * context-dependent, because 'value' might be statically bounded at
1439 * some point in the IR (e.g. due to a containing if statement), but
1440 * not elsewhere.
1441 *
1442 * This intrinsic always evaluates to its first argument. If this value is
1443 * used by a side-effecting operation and it is outside the range specified
1444 * by its second and third arguments, behavior is undefined. The compiler can
1445 * therefore assume that the value is within the range given and optimize
1446 * accordingly. Note that this permits promise_clamped to evaluate to
1447 * something outside of the range, provided that this value is not used.
1448 *
1449 * Note that this produces an intrinsic that is marked as 'pure' and thus is
1450 * allowed to be hoisted, etc.; thus, extra care must be taken with its use.
1451 **/
1452Expr promise_clamped(const Expr &value, const Expr &min, const Expr &max);
1453} // namespace Internal
1454
1455/** Scatter and gather are used for update definition which must store
1456 * multiple values to distinct locations at the same time. The
1457 * multiple expressions on the right-hand-side are bundled together
1458 * into a "gather", which must match a "scatter" the the same number
1459 * of arguments on the left-hand-size. For example, to store the
1460 * values 1 and 2 to the locations (x, y, 3) and (x, y, 4),
1461 * respectively:
1462 *
1463\code
1464f(x, y, scatter(3, 4)) = gather(1, 2);
1465\endcode
1466 *
1467 * The result of gather or scatter can be treated as an
1468 * expression. Any containing operations on it can be assumed to
1469 * distribute over the elements. If two gather expressions are
1470 * combined with an arithmetic operator (e.g. added), they combine
1471 * element-wise. The following example stores the values 2 * x, 2 * y,
1472 * and 2 * c to the locations (x + 1, y, c), (x, y + 3, c), and (x, y,
1473 * c + 2) respectively:
1474 *
1475\code
1476f(x + scatter(1, 0, 0), y + scatter(0, 3, 0), c + scatter(0, 0, 2)) = 2 * gather(x, y, c);
1477\endcode
1478*
1479* Repeated values in the scatter cause multiple stores to the same
1480* location. The stores happen in order from left to right, so the
1481* rightmost value wins. The following code is equivalent to f(x) = 5
1482*
1483\code
1484f(scatter(x, x)) = gather(3, 5);
1485\endcode
1486*
1487* Gathers are most useful for algorithms which require in-place
1488* swapping or permutation of multiple elements, or other kinds of
1489* in-place mutations that require loading multiple inputs, doing some
1490* operations to them jointly, then storing them again. The following
1491* update definition swaps the values of f at locations 3 and 5 if an
1492* input parameter p is true:
1493*
1494\code
1495f(scatter(3, 5)) = f(select(p, gather(5, 3), gather(3, 5)));
1496\endcode
1497*
1498* For more examples of the use of scatter and gather, see
1499* test/correctness/multiple_scatter.cpp
1500*
1501* It is not currently possible to use scatter and gather to write an
1502* update definition in which the *number* of values loaded or stored
1503* varies, as the size of the scatter/gather packet must be fixed a
1504* compile-time. A workaround is to make the unwanted extra operations
1505* a redundant copy of the last operation, which will be
1506* dead-code-eliminated by the compiler. For example, the following
1507* update definition swaps the values at locations 3 and 5 when the
1508* parameter p is true, and rotates the values at locations 1, 2, and 3
1509* when it is false. The load from 3 and store to 5 will be redundantly
1510* repeated:
1511*
1512\code
1513f(select(p, scatter(3, 5, 5), scatter(1, 2, 3))) = f(select(p, gather(5, 3, 3), gather(2, 3, 1)));
1514\endcode
1515*
1516* Note that in the p == true case, we redudantly load from 3 and write
1517* to 5 twice.
1518*/
1519//@{
1520Expr scatter(const std::vector<Expr> &args);
1521Expr gather(const std::vector<Expr> &args);
1522
1523template<typename... Args>
1524Expr scatter(const Expr &e, Args &&...args) {
1525 return scatter({e, std::forward<Args>(args)...});
1526}
1527
1528template<typename... Args>
1529Expr gather(const Expr &e, Args &&...args) {
1530 return gather({e, std::forward<Args>(args)...});
1531}
1532// @}
1533
1534/** Extract a contiguous subsequence of the bits of 'e', starting at the bit
1535 * index given by 'lsb', where zero is the least-significant bit, returning a
1536 * value of type 't'. Any out-of-range bits requested are filled with zeros.
1537 *
1538 * extract_bits is especially useful when one wants to load a small vector of a
1539 * wide type, and treat it as a larger vector of a smaller type. For example,
1540 * loading a vector of 32 uint8 values from a uint32 Func can be done as
1541 * follows:
1542\code
1543f8(x) = extract_bits<uint8_t>(f32(x/4), 8*(x%4));
1544f8.align_bounds(x, 4).vectorize(x, 32);
1545\endcode
1546 * Note that the align_bounds call is critical so that the narrow Exprs are
1547 * aligned to the wider Exprs. This makes the x%4 term collapse to a
1548 * constant. If f8 is an output Func, then constraining the min value of x to be
1549 * a known multiple of four would also be sufficient, e.g. via:
1550\code
1551f8.output_buffer().dim(0).set_min(0);
1552\endcode
1553 *
1554 * See test/correctness/extract_concat_bits.cpp for a complete example. */
1555// @{
1556Expr extract_bits(Type t, const Expr &e, const Expr &lsb);
1557
1558template<typename T>
1559Expr extract_bits(const Expr &e, const Expr &lsb) {
1560 return extract_bits(type_of<T>(), e, lsb);
1561}
1562// @}
1563
1564/** Given a number of Exprs of the same type, concatenate their bits producing a
1565 * single Expr of the same type code of the input but with more bits. The
1566 * number of arguments must be a power of two.
1567 *
1568 * concat_bits is especially useful when one wants to treat a Func containing
1569 * values of a narrow type as a Func containing fewer values of a wider
1570 * type. For example, the following code reinterprets vectors of 32 uint8 values
1571 * as a vector of 8 uint32s:
1572 *
1573\code
1574f32(x) = concat_bits({f8(4*x), f8(4*x + 1), f8(4*x + 2), f8(4*x + 3)});
1575f32.vectorize(x, 8);
1576\endcode
1577 *
1578 * See test/correctness/extract_concat_bits.cpp for a complete example.
1579 */
1580Expr concat_bits(const std::vector<Expr> &e);
1581
1582/** Below is a collection of intrinsics for fixed-point programming. Most of
1583 * them can be expressed via other means, but this is more natural for some, as
1584 * it avoids ghost widened intermediates that don't (or shouldn't) actually show
1585 * up in codegen, and doesn't rely on pattern-matching inside the compiler to
1586 * succeed to get good instruction selection.
1587 *
1588 * The semantics of each call are defined in terms of a non-existent 'widen' and
1589 * 'narrow' operators, which stand in for casts that double or halve the
1590 * bit-width of a type respectively.
1591 */
1592
1593/** Compute a + widen(b). */
1595
1596/** Compute a * widen(b). */
1598
1599/** Compute a - widen(b). */
1601
1602/** Compute widen(a) + widen(b). */
1604
1605/** Compute widen(a) * widen(b). a and b may have different signedness, in which
1606 * case the result is signed. */
1608
1609/** Compute widen(a) - widen(b). The result is always signed. */
1611
1612/** Compute widen(a) << b. */
1613//@{
1616//@}
1617
1618/** Compute widen(a) >> b. */
1619//@{
1622//@}
1623
1624/** Compute saturating_narrow(widening_add(a, (1 >> min(b, 0)) / 2) << b).
1625 * When b is positive indicating a left shift, the rounding term is zero. */
1626//@{
1629//@}
1630
1631/** Compute saturating_narrow(widening_add(a, (1 << max(b, 0)) / 2) >> b).
1632 * When b is negative indicating a left shift, the rounding term is zero. */
1633//@{
1636//@}
1637
1638/** Compute saturating_narrow(widen(a) + widen(b)) */
1640
1641/** Compute saturating_narrow(widen(a) - widen(b)) */
1643
1644/** Compute narrow((widen(a) + widen(b)) / 2) */
1646
1647/** Compute narrow((widen(a) + widen(b) + 1) / 2) */
1649
1650/** Compute narrow((widen(a) - widen(b)) / 2) */
1652
1653/** Compute saturating_narrow(shift_right(widening_mul(a, b), q)) */
1654//@{
1657//@}
1658
1659/** Compute saturating_narrow(rounding_shift_right(widening_mul(a, b), q)) */
1660//@{
1663//@}
1664
1665namespace Internal {
1666
1667template<typename T = void>
1668HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1669Expr widen_right_add(const Expr &a, const Expr &b, T * = nullptr) {
1670 return Halide::widen_right_add(a, b);
1671}
1672template<typename T = void>
1673HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1674Expr widen_right_mul(const Expr &a, const Expr &b, T * = nullptr) {
1675 return Halide::widen_right_mul(a, b);
1676}
1677template<typename T = void>
1678HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1679Expr widen_right_sub(const Expr &a, const Expr &b, T * = nullptr) {
1680 return Halide::widen_right_sub(a, b);
1681}
1682template<typename T = void>
1683HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1684Expr widening_add(const Expr &a, const Expr &b, T * = nullptr) {
1685 return Halide::widening_add(a, b);
1686}
1687template<typename T = void>
1688HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1689Expr widening_mul(const Expr &a, const Expr &b, T * = nullptr) {
1690 return Halide::widening_mul(a, b);
1691}
1692template<typename T = void>
1693HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1694Expr widening_sub(const Expr &a, const Expr &b, T * = nullptr) {
1695 return Halide::widening_sub(a, b);
1696}
1697template<typename T = void>
1698HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1700 return Halide::widening_shift_left(a, b);
1701}
1702template<typename T = void>
1703HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1704Expr widening_shift_left(const Expr &a, int b, T * = nullptr) {
1705 return Halide::widening_shift_left(a, b);
1706}
1707template<typename T = void>
1708HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1710 return Halide::widening_shift_right(a, b);
1711}
1712template<typename T = void>
1713HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1714Expr widening_shift_right(const Expr &a, int b, T * = nullptr) {
1715 return Halide::widening_shift_right(a, b);
1716}
1717template<typename T = void>
1718HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1720 return Halide::widening_shift_left(a, b);
1721}
1722template<typename T = void>
1723HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1724Expr rounding_shift_left(const Expr &a, int b, T * = nullptr) {
1725 return Halide::widening_shift_left(a, b);
1726}
1727template<typename T = void>
1728HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1730 return Halide::rounding_shift_right(a, b);
1731}
1732template<typename T = void>
1733HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1734Expr rounding_shift_right(const Expr &a, int b, T * = nullptr) {
1735 return Halide::rounding_shift_right(a, b);
1736}
1737template<typename T = void>
1738HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1739Expr saturating_add(const Expr &a, const Expr &b, T * = nullptr) {
1740 return Halide::saturating_add(a, b);
1741}
1742template<typename T = void>
1743HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1744Expr saturating_sub(const Expr &a, const Expr &b, T * = nullptr) {
1745 return Halide::saturating_sub(a, b);
1746}
1747template<typename T = void>
1748HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1749Expr halving_add(const Expr &a, const Expr &b, T * = nullptr) {
1750 return Halide::halving_add(a, b);
1751}
1752template<typename T = void>
1753HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1755 return Halide::rounding_halving_add(a, b);
1756}
1757template<typename T = void>
1758HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1759Expr halving_sub(const Expr &a, const Expr &b, T * = nullptr) {
1760 return Halide::halving_sub(a, b);
1761}
1762template<typename T = void>
1763HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1764Expr mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T * = nullptr) {
1765 return Halide::mul_shift_right(a, b, q);
1766}
1767template<typename T = void>
1768HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1769Expr mul_shift_right(const Expr &a, const Expr &b, int q, T * = nullptr) {
1770 return Halide::mul_shift_right(a, b, q);
1771}
1772template<typename T = void>
1773HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1775 return Halide::rounding_mul_shift_right(a, b, q);
1776}
1777template<typename T = void>
1778HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1779Expr rounding_mul_shift_right(const Expr &a, const Expr &b, int q, T * = nullptr) {
1780 return Halide::rounding_mul_shift_right(a, b, q);
1781}
1782} // namespace Internal
1783
1784} // namespace Halide
1785
1786#endif
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
#define HALIDE_ATTRIBUTE_DEPRECATED(x)
Defines Tuple - the front-end handle on small arrays of expressions.
#define HALIDE_NO_USER_CODE_INLINE
Definition Util.h:45
Create a small array of Exprs for defining and calling functions with multiple outputs.
Definition Tuple.h:18
Expr make_one(Type t)
Construct the representation of one in the given type.
T div_imp(T a, T b)
Definition IROperator.h:260
bool is_const_zero(const Expr &e)
Is the expression a const (as defined by is_const), and also equal to zero (in all lanes,...
Expr saturating_add(const Expr &a, const Expr &b, T *=nullptr)
Expr memoize_tag_helper(Expr result, const std::vector< Expr > &cache_key_values)
const double * as_const_float(const Expr &e)
If an expression is a FloatImm or a Broadcast of a FloatImm, return a pointer to its value.
Expr widen_right_sub(const Expr &a, const Expr &b, T *=nullptr)
Expr make_zero(Type t)
Construct the representation of zero in the given type.
bool is_negative_const(const Expr &e)
Is the expression a const (as defined by is_const), and also strictly less than zero (in all lanes,...
bool is_undef(const Expr &e)
Is the expression an undef.
Expr requirement_failed_error(Expr condition, const std::vector< Expr > &args)
Expr make_two(Type t)
Construct the representation of two in the given type.
void check_representable(Type t, int64_t val)
Check if a constant value can be correctly represented as the given type.
Expr halide_erf(const Expr &a)
bool is_const_one(const Expr &e)
Is the expression a const (as defined by is_const), and also equal to one (in all lanes,...
Expr widening_shift_left(const Expr &a, const Expr &b, T *=nullptr)
void match_types(Expr &a, Expr &b)
Coerce the two expressions to have the same type, using C-style casting rules.
double div_imp< double >(double a, double b)
Definition IROperator.h:301
Expr saturating_sub(const Expr &a, const Expr &b, T *=nullptr)
Expr halide_exp(const Expr &a)
Expr halving_sub(const Expr &a, const Expr &b, T *=nullptr)
Expr make_const(Type t, int64_t val)
Construct an immediate of the given type from any numeric C++ type.
Expr widening_shift_right(const Expr &a, const Expr &b, T *=nullptr)
const int64_t * as_const_int(const Expr &e)
If an expression is an IntImm or a Broadcast of an IntImm, return a pointer to its value.
bool is_positive_const(const Expr &e)
Is the expression a const (as defined by is_const), and also strictly greater than zero (in all lanes...
Expr const_true(int lanes=1)
Construct the constant boolean true.
bool is_signed_integer_overflow(const Expr &expr)
Check if an expression is a signed_integer_overflow.
T mod_imp(T a, T b)
Implementations of division and mod that are specific to Halide.
Definition IROperator.h:239
Expr halide_log(const Expr &a)
Halide's vectorizable transcendentals.
bool is_pure(const Expr &e)
Does the expression 1) Take on the same value no matter where it appears in a Stmt,...
void split_into_ands(const Expr &cond, std::vector< Expr > &result)
Split a boolean condition into vector of ANDs.
Expr promise_clamped(const Expr &value, const Expr &min, const Expr &max)
FOR INTERNAL USE ONLY.
bool is_no_op(const Stmt &s)
Is the statement a no-op (which we represent as either an undefined Stmt, or as an Evaluate node of a...
Expr unwrap_tags(const Expr &e)
If the expression is a tag helper call, remove it and return the tagged expression.
float div_imp< float >(float a, float b)
Definition IROperator.h:297
bool is_const_power_of_two_integer(const Expr &e, int *bits)
Is the expression a constant integer power of two.
Expr lossless_negate(const Expr &x)
Attempt to negate x without introducing new IR and without overflow.
const uint64_t * as_const_uint(const Expr &e)
If an expression is a UIntImm or a Broadcast of a UIntImm, return a pointer to its value.
Expr strided_ramp_base(const Expr &e, int stride=1)
If e is a ramp expression with stride, default 1, return the base, otherwise undefined.
Expr remove_promises(const Expr &e)
Return an Expr that is identical to the input Expr, but with all calls to promise_clamped() and unsaf...
Expr widening_sub(const Expr &a, const Expr &b, T *=nullptr)
Expr rounding_mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T *=nullptr)
Expr rounding_halving_add(const Expr &a, const Expr &b, T *=nullptr)
Expr widening_add(const Expr &a, const Expr &b, T *=nullptr)
Expr const_false(int lanes=1)
Construct the constant boolean false.
Expr rounding_shift_left(const Expr &a, const Expr &b, T *=nullptr)
double mod_imp< double >(double a, double b)
Definition IROperator.h:291
Expr widen_right_mul(const Expr &a, const Expr &b, T *=nullptr)
Expr rounding_shift_right(const Expr &a, const Expr &b, T *=nullptr)
Expr make_bool(bool val, int lanes=1)
Construct a boolean constant from a C++ boolean value.
Expr mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T *=nullptr)
HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector< Expr > &args)
Definition IROperator.h:335
void match_types_bitwise(Expr &a, Expr &b, const char *op_name)
Asserts that both expressions are integer types and are either both signed or both unsigned.
Expr halving_add(const Expr &a, const Expr &b, T *=nullptr)
float mod_imp< float >(float a, float b)
Definition IROperator.h:285
Expr lossless_cast(Type t, Expr e)
Attempt to cast an expression to a smaller type while provably not losing information.
Expr widening_mul(const Expr &a, const Expr &b, T *=nullptr)
Expr raise_to_integer_power(Expr a, int64_t b)
Raise an expression to an integer power by repeatedly multiplying it by itself.
Expr make_signed_integer_overflow(Type type)
Construct a unique signed_integer_overflow Expr.
bool is_const(const Expr &e)
Is the expression either an IntImm, a FloatImm, a StringImm, or a Cast of the same,...
Expr widen_right_add(const Expr &a, const Expr &b, T *=nullptr)
Expr remove_likelies(const Expr &e)
Return an Expr that is identical to the input Expr, but with all calls to likely() and likely_if_inne...
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
auto operator>=(const Other &a, const GeneratorParam< T > &b) -> decltype(a >=(T) b)
Greater than or equal comparison between GeneratorParam<T> and any type that supports operator>= with...
Definition Generator.h:1101
Expr log(Expr x)
Return the logarithm of a floating-point expression.
Expr operator>>(Expr x, Expr y)
Shift the bits of an integer value right.
Expr ceil(Expr x)
Return the least whole number greater than or equal to a floating-point expression.
Expr widen_right_add(Expr a, Expr b)
Below is a collection of intrinsics for fixed-point programming.
Expr rounding_shift_right(Expr a, Expr b)
Compute saturating_narrow(widening_add(a, (1 << max(b, 0)) / 2) >> b).
HALIDE_NO_USER_CODE_INLINE Expr memoize_tag(Expr result, Args &&...args)
Control the values used in the memoization cache key for memoize.
Expr fast_log(const Expr &x)
Fast approximate cleanly vectorizable log for Float(32).
Expr count_leading_zeros(Expr x)
Count the number of leading zero bits in an expression.
Expr reinterpret(Type t, Expr e)
Reinterpret the bits of one value as another type.
Expr saturating_add(Expr a, Expr b)
Compute saturating_narrow(widen(a) + widen(b))
auto operator==(const Other &a, const GeneratorParam< T > &b) -> decltype(a==(T) b)
Equality comparison between GeneratorParam<T> and any type that supports operator== with T.
Definition Generator.h:1127
Expr fast_cos(const Expr &x)
Expr & operator*=(Expr &a, Expr b)
Modify the first expression to be the product of two expressions, without changing its type.
Expr random_uint(Expr seed=Expr())
Return a random variable representing a uniformly distributed unsigned 32-bit integer.
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr fract(const Expr &x)
Return the fractional part of a floating-point expression.
Expr halving_add(Expr a, Expr b)
Compute narrow((widen(a) + widen(b)) / 2)
Expr & operator-=(Expr &a, Expr b)
Modify the first expression to be the difference of two expressions, without changing its type.
auto operator<(const Other &a, const GeneratorParam< T > &b) -> decltype(a<(T) b)
Less than comparison between GeneratorParam<T> and any type that supports operator< with T.
Definition Generator.h:1088
Expr widening_shift_right(Expr a, Expr b)
Compute widen(a) >> b.
auto operator*(const Other &a, const GeneratorParam< T > &b) -> decltype(a *(T) b)
Multiplication between GeneratorParam<T> and any type that supports operator* with T.
Definition Generator.h:1036
Expr trunc(Expr x)
Return the integer part of a floating-point expression.
Expr halving_sub(Expr a, Expr b)
Compute narrow((widen(a) - widen(b)) / 2)
auto operator||(const Other &a, const GeneratorParam< T > &b) -> decltype(a||(T) b)
Logical or between between GeneratorParam<T> and any type that supports operator|| with T.
Definition Generator.h:1170
Expr acosh(Expr x)
Return the hyperbolic arccosine of a floating-point expression.
Expr fast_inverse(Expr x)
Fast approximate inverse for Float(32).
Expr asin(Expr x)
Return the arcsine of a floating-point expression.
Expr rounding_shift_left(Expr a, Expr b)
Compute saturating_narrow(widening_add(a, (1 >> min(b, 0)) / 2) << b).
auto operator-(const Other &a, const GeneratorParam< T > &b) -> decltype(a -(T) b)
Subtraction between GeneratorParam<T> and any type that supports operator- with T.
Definition Generator.h:1023
Expr clamp(Expr a, const Expr &min_val, const Expr &max_val)
Clamps an expression to lie within the given bounds.
Expr hypot(const Expr &x, const Expr &y)
Return the square root of the sum of the squares of two floating-point expressions.
Expr popcount(Expr x)
Count the number of set bits in an expression.
Expr saturating_sub(Expr a, Expr b)
Compute saturating_narrow(widen(a) - widen(b))
Expr gather(const std::vector< Expr > &args)
Expr print_when(Expr condition, const std::vector< Expr > &values)
Create an Expr that prints whenever it is evaluated, provided that the condition is true.
Expr widening_shift_left(Expr a, Expr b)
Compute widen(a) << b.
Expr pow(Expr x, Expr y)
Return one floating point expression raised to the power of another.
Expr operator&(Expr x, Expr y)
Return the bitwise and of two expressions (which need not have the same type).
Expr undef()
Expr cast(Expr a)
Cast an expression to the halide type corresponding to the C++ type T.
Definition IROperator.h:358
auto operator!(const GeneratorParam< T > &a) -> decltype(!(T) a)
Not operator for GeneratorParam.
Definition Generator.h:1242
Expr lerp(Expr zero_val, Expr one_val, Expr weight)
Linear interpolate between the two values according to a weight.
Expr atan2(Expr y, Expr x)
Return the angle of a floating-point gradient.
Expr saturating_cast(Expr e)
Cast an expression to the halide type corresponding to the C++ type T.
Expr random_float(Expr seed=Expr())
Return a random variable representing a uniformly distributed float in the half-open interval [0....
Expr sin(Expr x)
Return the sine of a floating-point expression.
Expr unsafe_promise_clamped(const Expr &value, const Expr &min, const Expr &max)
Create an Expr that that promises another Expr is clamped but do not generate code to check the asser...
Expr rounding_halving_add(Expr a, Expr b)
Compute narrow((widen(a) + widen(b) + 1) / 2)
Expr extract_bits(Type t, const Expr &e, const Expr &lsb)
Extract a contiguous subsequence of the bits of 'e', starting at the bit index given by 'lsb',...
Expr concat_bits(const std::vector< Expr > &e)
Given a number of Exprs of the same type, concatenate their bits producing a single Expr of the same ...
Expr mux(const Expr &id, const std::initializer_list< Expr > &values)
Oftentimes we want to pack a list of expressions with the same type into a channel dimension,...
Expr cosh(Expr x)
Return the hyperbolic cosine of a floating-point expression.
std::ostream & operator<<(std::ostream &stream, const Expr &)
Emit an expression on an output stream (such as std::cout) in human-readable form.
Type Int(int bits, int lanes=1)
Constructing a signed integer type.
Definition Type.h:526
Expr acos(Expr x)
Return the arccosine of a floating-point expression.
Expr fast_exp(const Expr &x)
Fast approximate cleanly vectorizable exp for Float(32).
Expr widening_add(Expr a, Expr b)
Compute widen(a) + widen(b).
Expr cos(Expr x)
Return the cosine of a floating-point expression.
auto operator+(const Other &a, const GeneratorParam< T > &b) -> decltype(a+(T) b)
Addition between GeneratorParam<T> and any type that supports operator+ with T.
Definition Generator.h:1010
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition Func.h:584
Expr exp(Expr x)
Return the exponential of a floating-point expression.
Expr widen_right_mul(Expr a, Expr b)
Compute a * widen(b).
Expr absd(Expr a, Expr b)
Return the absolute difference between two values.
auto operator&&(const Other &a, const GeneratorParam< T > &b) -> decltype(a &&(T) b)
Logical and between between GeneratorParam<T> and any type that supports operator&& with T.
Definition Generator.h:1153
Tuple tuple_select(const Tuple &condition, const Tuple &true_value, const Tuple &false_value)
Equivalent of ternary select(), but taking/returning tuples.
Expr fast_sin(const Expr &x)
Fast vectorizable approximation to some trigonometric functions for Float(32).
Expr fast_pow(Expr x, Expr y)
Fast approximate cleanly vectorizable pow for Float(32).
auto operator%(const Other &a, const GeneratorParam< T > &b) -> decltype(a %(T) b)
Modulo between GeneratorParam<T> and any type that supports operator% with T.
Definition Generator.h:1062
@ C
No name mangling.
Expr round(Expr x)
Return the whole number closest to a floating-point expression.
Expr select(Expr condition, Expr true_value, Expr false_value)
Returns an expression similar to the ternary operator in C, except that it always evaluates all argum...
Expr count_trailing_zeros(Expr x)
Count the number of trailing zero bits in an expression.
Expr scatter(const std::vector< Expr > &args)
Scatter and gather are used for update definition which must store multiple values to distinct locati...
auto operator<=(const Other &a, const GeneratorParam< T > &b) -> decltype(a<=(T) b)
Less than or equal comparison between GeneratorParam<T> and any type that supports operator<= with T.
Definition Generator.h:1114
Expr rounding_mul_shift_right(Expr a, Expr b, Expr q)
Compute saturating_narrow(rounding_shift_right(widening_mul(a, b), q))
Expr random_int(Expr seed=Expr())
Return a random variable representing a uniformly distributed 32-bit integer.
Expr mod_round_to_zero(Expr x, Expr y)
Compute the remainder of dividing two integers, when division is rounding toward zero.
Expr strict_float(Expr e)
Makes a best effort attempt to preserve IEEE floating-point semantics in evaluating an expression.
Expr & operator/=(Expr &a, Expr b)
Modify the first expression to be the ratio of two expressions, without changing its type.
Expr widening_mul(Expr a, Expr b)
Compute widen(a) * widen(b).
auto operator>(const Other &a, const GeneratorParam< T > &b) -> decltype(a >(T) b)
Greater than comparison between GeneratorParam<T> and any type that supports operator> with T.
Definition Generator.h:1075
Expr is_nan(Expr x)
Returns true if the argument is a Not a Number (NaN).
Expr asinh(Expr x)
Return the hyperbolic arcsinhe of a floating-point expression.
Expr sqrt(Expr x)
Return the square root of a floating-point expression.
Expr sinh(Expr x)
Return the hyperbolic sine of a floating-point expression.
Expr atan(Expr x)
Return the arctangent of a floating-point expression.
Expr operator|(Expr x, Expr y)
Return the bitwise or of two expressions (which need not have the same type).
auto operator!=(const Other &a, const GeneratorParam< T > &b) -> decltype(a !=(T) b)
Inequality comparison between between GeneratorParam<T> and any type that supports operator!...
Definition Generator.h:1140
Expr require(Expr condition, const std::vector< Expr > &values)
Create an Expr that that guarantees a precondition.
Expr is_inf(Expr x)
Returns true if the argument is Inf or -Inf.
Expr is_finite(Expr x)
Returns true if the argument is a finite value (ie, neither NaN nor Inf).
Expr tanh(Expr x)
Return the hyperbolic tangent of a floating-point expression.
Expr likely_if_innermost(Expr e)
Equivalent to likely, but only triggers a loop partitioning if found in an innermost loop.
Expr atanh(Expr x)
Return the hyperbolic arctangent of a floating-point expression.
Expr tan(Expr x)
Return the tangent of a floating-point expression.
Expr fast_inverse_sqrt(Expr x)
Fast approximate inverse square root for Float(32).
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
Expr mul_shift_right(Expr a, Expr b, Expr q)
Compute saturating_narrow(shift_right(widening_mul(a, b), q))
auto operator/(const Other &a, const GeneratorParam< T > &b) -> decltype(a/(T) b)
Division between GeneratorParam<T> and any type that supports operator/ with T.
Definition Generator.h:1049
Expr & operator+=(Expr &a, Expr b)
Modify the first expression to be the sum of two expressions, without changing its type.
Expr abs(Expr a)
Returns the absolute value of a signed integer or floating-point expression.
Expr widen_right_sub(Expr a, Expr b)
Compute a - widen(b).
Expr max(const FuncRef &a, const FuncRef &b)
Definition Func.h:587
Expr floor(Expr x)
Return the greatest whole number less than or equal to a floating-point expression.
Expr div_round_to_zero(Expr x, Expr y)
Divide two integers, rounding towards zero.
Expr widening_sub(Expr a, Expr b)
Compute widen(a) - widen(b).
Expr likely(Expr e)
Expressions tagged with this intrinsic are considered to be part of the steady state of some loop wit...
Expr operator~(Expr x)
Return the bitwise not of an expression.
Expr erf(const Expr &x)
Evaluate the error function erf.
Expr operator^(Expr x, Expr y)
Return the bitwise xor of two expressions (which need not have the same type).
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
signed __INT32_TYPE__ int32_t
unsigned __INT8_TYPE__ uint8_t
unsigned __INT16_TYPE__ uint16_t
unsigned __INT32_TYPE__ uint32_t
signed __INT16_TYPE__ int16_t
signed __INT8_TYPE__ int8_t
A fragment of Halide syntax.
Definition Expr.h:257
A builder to help create Exprs representing halide_buffer_t structs (e.g.
Definition IROperator.h:210
std::vector< Expr > strides
Definition IROperator.h:215
std::vector< Expr > extents
Definition IROperator.h:215
A reference-counted handle to a statement node.
Definition Expr.h:418
static constexpr bool value
Definition IROperator.h:327
Types in the halide type system.
Definition Type.h:276
HALIDE_ALWAYS_INLINE bool is_int() const
Is this type a signed integer type?
Definition Type.h:424
HALIDE_ALWAYS_INLINE bool is_float() const
Is this type a floating point type (float or double).
Definition Type.h:412
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition Float16.h:17