Halide 14.0.0
Halide compiler and libraries
Float16.h
Go to the documentation of this file.
1#ifndef HALIDE_FLOAT16_H
2#define HALIDE_FLOAT16_H
3
5#include <cstdint>
6#include <string>
7
8namespace Halide {
9
10/** Class that provides a type that implements half precision
11 * floating point (IEEE754 2008 binary16) in software.
12 *
13 * This type is enforced to be 16-bits wide and maintains no state
14 * other than the raw IEEE754 binary16 bits so that it can passed
15 * to code that checks a type's size and used for halide_buffer_t allocation.
16 * */
17struct float16_t {
18
19 static const int mantissa_bits = 10;
20 static const uint16_t sign_mask = 0x8000;
21 static const uint16_t exponent_mask = 0x7c00;
22 static const uint16_t mantissa_mask = 0x03ff;
23
24 /// \name Constructors
25 /// @{
26
27 /** Construct from a float, double, or int using
28 * round-to-nearest-ties-to-even. Out-of-range values become +/-
29 * infinity.
30 */
31 // @{
32 explicit float16_t(float value);
33 explicit float16_t(double value);
34 explicit float16_t(int value);
35 // @}
36
37 /** Construct a float16_t with the bits initialised to 0. This represents
38 * positive zero.*/
39 float16_t() = default;
40
41 /// @}
42
43 // Use explicit to avoid accidently raising the precision
44 /** Cast to float */
45 explicit operator float() const;
46 /** Cast to double */
47 explicit operator double() const;
48 /** Cast to int */
49 explicit operator int() const;
50
51 /** Get a new float16_t that represents a special value */
52 // @{
58 // @}
59
60 /** Get a new float16_t with the given raw bits
61 *
62 * \param bits The bits conformant to IEEE754 binary16
63 */
65
66 /** Return a new float16_t with a negated sign bit*/
68
69 /** Arithmetic operators. */
70 // @{
76 return (*this = *this + rhs);
77 }
79 return (*this = *this - rhs);
80 }
82 return (*this = *this * rhs);
83 }
85 return (*this = *this / rhs);
86 }
87 // @}
88
89 /** Comparison operators */
90 // @{
91 bool operator==(float16_t rhs) const;
92 bool operator!=(float16_t rhs) const {
93 return !(*this == rhs);
94 }
95 bool operator>(float16_t rhs) const;
96 bool operator<(float16_t rhs) const;
97 bool operator>=(float16_t rhs) const {
98 return (*this > rhs) || (*this == rhs);
99 }
100 bool operator<=(float16_t rhs) const {
101 return (*this < rhs) || (*this == rhs);
102 }
103 // @}
104
105 /** Properties */
106 // @{
107 bool is_nan() const;
108 bool is_infinity() const;
109 bool is_negative() const;
110 bool is_zero() const;
111 // @}
112
113 /** Returns the bits that represent this float16_t.
114 *
115 * An alternative method to access the bits is to cast a pointer
116 * to this instance as a pointer to a uint16_t.
117 **/
119
120private:
121 // The raw bits.
122 uint16_t data = 0;
123};
124
125static_assert(sizeof(float16_t) == 2, "float16_t should occupy two bytes");
126
127} // namespace Halide
128
129template<>
130HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<Halide::float16_t>() {
132}
133
134namespace Halide {
135
136/** Class that provides a type that implements half precision
137 * floating point using the bfloat16 format.
138 *
139 * This type is enforced to be 16-bits wide and maintains no state
140 * other than the raw bits so that it can passed to code that checks
141 * a type's size and used for halide_buffer_t allocation. */
143
144 static const int mantissa_bits = 7;
145 static const uint16_t sign_mask = 0x8000;
146 static const uint16_t exponent_mask = 0x7f80;
147 static const uint16_t mantissa_mask = 0x007f;
148
150
151 /// \name Constructors
152 /// @{
153
154 /** Construct from a float, double, or int using
155 * round-to-nearest-ties-to-even. Out-of-range values become +/-
156 * infinity.
157 */
158 // @{
159 explicit bfloat16_t(float value);
160 explicit bfloat16_t(double value);
161 explicit bfloat16_t(int value);
162 // @}
163
164 /** Construct a bfloat16_t with the bits initialised to 0. This represents
165 * positive zero.*/
166 bfloat16_t() = default;
167
168 /// @}
169
170 // Use explicit to avoid accidently raising the precision
171 /** Cast to float */
172 explicit operator float() const;
173 /** Cast to double */
174 explicit operator double() const;
175 /** Cast to int */
176 explicit operator int() const;
177
178 /** Get a new bfloat16_t that represents a special value */
179 // @{
185 // @}
186
187 /** Get a new bfloat16_t with the given raw bits
188 *
189 * \param bits The bits conformant to IEEE754 binary16
190 */
192
193 /** Return a new bfloat16_t with a negated sign bit*/
195
196 /** Arithmetic operators. */
197 // @{
203 return (*this = *this + rhs);
204 }
206 return (*this = *this - rhs);
207 }
209 return (*this = *this * rhs);
210 }
212 return (*this = *this / rhs);
213 }
214 // @}
215
216 /** Comparison operators */
217 // @{
218 bool operator==(bfloat16_t rhs) const;
219 bool operator!=(bfloat16_t rhs) const {
220 return !(*this == rhs);
221 }
222 bool operator>(bfloat16_t rhs) const;
223 bool operator<(bfloat16_t rhs) const;
224 bool operator>=(bfloat16_t rhs) const {
225 return (*this > rhs) || (*this == rhs);
226 }
227 bool operator<=(bfloat16_t rhs) const {
228 return (*this < rhs) || (*this == rhs);
229 }
230 // @}
231
232 /** Properties */
233 // @{
234 bool is_nan() const;
235 bool is_infinity() const;
236 bool is_negative() const;
237 bool is_zero() const;
238 // @}
239
240 /** Returns the bits that represent this bfloat16_t.
241 *
242 * An alternative method to access the bits is to cast a pointer
243 * to this instance as a pointer to a uint16_t.
244 **/
246
247private:
248 // The raw bits.
249 uint16_t data = 0;
250};
251
252static_assert(sizeof(bfloat16_t) == 2, "bfloat16_t should occupy two bytes");
253
254} // namespace Halide
255
256template<>
257HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<Halide::bfloat16_t>() {
259}
260
261#endif
This file declares the routines used by Halide internally in its runtime.
@ halide_type_float
IEEE floating point numbers.
@ halide_type_bfloat
floating point numbers in the bfloat format
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:38
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
unsigned __INT16_TYPE__ uint16_t
Class that provides a type that implements half precision floating point using the bfloat16 format.
Definition: Float16.h:142
bfloat16_t operator-=(bfloat16_t rhs)
Definition: Float16.h:205
bool operator>(bfloat16_t rhs) const
bool operator<=(bfloat16_t rhs) const
Definition: Float16.h:227
static const bfloat16_t zero
Definition: Float16.h:149
bfloat16_t operator-() const
Return a new bfloat16_t with a negated sign bit.
bool operator<(bfloat16_t rhs) const
static const uint16_t exponent_mask
Definition: Float16.h:146
bfloat16_t operator*(bfloat16_t rhs) const
bool is_infinity() const
static const uint16_t sign_mask
Definition: Float16.h:145
uint16_t to_bits() const
Returns the bits that represent this bfloat16_t.
static const bfloat16_t infinity
Definition: Float16.h:149
static const uint16_t mantissa_mask
Definition: Float16.h:147
bool operator==(bfloat16_t rhs) const
Comparison operators.
static const bfloat16_t nan
Definition: Float16.h:149
bfloat16_t operator+(bfloat16_t rhs) const
Arithmetic operators.
bfloat16_t operator*=(bfloat16_t rhs)
Definition: Float16.h:208
static const bfloat16_t negative_zero
Definition: Float16.h:149
bfloat16_t operator-(bfloat16_t rhs) const
bfloat16_t operator/=(bfloat16_t rhs)
Definition: Float16.h:211
bool is_negative() const
bfloat16_t()=default
Construct a bfloat16_t with the bits initialised to 0.
bfloat16_t operator/(bfloat16_t rhs) const
static bfloat16_t make_zero()
Get a new bfloat16_t that represents a special value.
static bfloat16_t make_negative_zero()
static const int mantissa_bits
Definition: Float16.h:144
static bfloat16_t make_infinity()
bfloat16_t(float value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
static bfloat16_t make_from_bits(uint16_t bits)
Get a new bfloat16_t with the given raw bits.
bool is_zero() const
bfloat16_t(int value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool is_nan() const
Properties.
static bfloat16_t make_negative_infinity()
bfloat16_t(double value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool operator!=(bfloat16_t rhs) const
Definition: Float16.h:219
static const bfloat16_t negative_infinity
Definition: Float16.h:149
bfloat16_t operator+=(bfloat16_t rhs)
Definition: Float16.h:202
bool operator>=(bfloat16_t rhs) const
Definition: Float16.h:224
static bfloat16_t make_nan()
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition: Float16.h:17
static float16_t make_infinity()
float16_t operator/(float16_t rhs) const
bool is_negative() const
float16_t(double value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
static const uint16_t sign_mask
Definition: Float16.h:20
float16_t operator+(float16_t rhs) const
Arithmetic operators.
bool is_zero() const
float16_t(int value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool operator>=(float16_t rhs) const
Definition: Float16.h:97
static float16_t make_zero()
Get a new float16_t that represents a special value.
uint16_t to_bits() const
Returns the bits that represent this float16_t.
bool operator<(float16_t rhs) const
static const uint16_t mantissa_mask
Definition: Float16.h:22
bool operator==(float16_t rhs) const
Comparison operators.
static float16_t make_negative_zero()
static float16_t make_from_bits(uint16_t bits)
Get a new float16_t with the given raw bits.
float16_t operator/=(float16_t rhs)
Definition: Float16.h:84
static float16_t make_nan()
float16_t(float value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
float16_t()=default
Construct a float16_t with the bits initialised to 0.
static const uint16_t exponent_mask
Definition: Float16.h:21
float16_t operator-(float16_t rhs) const
bool is_nan() const
Properties.
static float16_t make_negative_infinity()
static const int mantissa_bits
Definition: Float16.h:19
bool is_infinity() const
float16_t operator-=(float16_t rhs)
Definition: Float16.h:78
float16_t operator*(float16_t rhs) const
float16_t operator-() const
Return a new float16_t with a negated sign bit.
bool operator!=(float16_t rhs) const
Definition: Float16.h:92
bool operator<=(float16_t rhs) const
Definition: Float16.h:100
bool operator>(float16_t rhs) const
float16_t operator*=(float16_t rhs)
Definition: Float16.h:81
float16_t operator+=(float16_t rhs)
Definition: Float16.h:75
A runtime tag for a type in the halide type system.