HalideRuntime.h
11.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#ifndef HALIDE_HALIDERUNTIME_H
#define HALIDE_HALIDERUNTIME_H
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
// Note that you should not use "inline" along with HALIDE_ALWAYS_INLINE;
// it is not necessary, and may produce warnings for some build configurations.
#ifdef _MSC_VER
#define HALIDE_ALWAYS_INLINE __forceinline
#define HALIDE_NEVER_INLINE __declspec(noinline)
#else
#define HALIDE_ALWAYS_INLINE __attribute__((always_inline)) inline
#define HALIDE_NEVER_INLINE __attribute__((noinline))
#endif
/** \file
*
* This file declares the routines used by Halide internally in its
* runtime. On platforms that support weak linking, these can be
* replaced with user-defined versions by defining an extern "C"
* function with the same name and signature.
*
* When doing Just In Time (JIT) compilation methods on the Func being
* compiled must be called instead. The corresponding methods are
* documented below.
*
* All of these functions take a "void *user_context" parameter as their
* first argument; if the Halide kernel that calls back to any of these
* functions has been compiled with the UserContext feature set on its Target,
* then the value of that pointer passed from the code that calls the
* Halide kernel is piped through to the function.
*
* Some of these are also useful to call when using the default
* implementation. E.g. halide_shutdown_thread_pool.
*
* Note that even on platforms with weak linking, some linker setups
* may not respect the override you provide. E.g. if the override is
* in a shared library and the halide object files are linked directly
* into the output, the builtin versions of the runtime functions will
* be called. See your linker documentation for more details. On
* Linux, LD_DYNAMIC_WEAK=1 may help.
*
*/
// Forward-declare to suppress warnings if compiling as C.
struct halide_buffer_t;
/** Types in the halide type system. They can be ints, unsigned ints,
* or floats (of various bit-widths), or a handle (which is always 64-bits).
* Note that the int/uint/float values do not imply a specific bit width
* (the bit width is expected to be encoded in a separate value).
*/
typedef enum halide_type_code_t
{
halide_type_int = 0, //!< signed integers
halide_type_uint = 1, //!< unsigned integers
halide_type_float = 2, //!< floating point numbers
halide_type_handle = 3 //!< opaque pointer type (void *)
} halide_type_code_t;
// Note that while __attribute__ can go before or after the declaration,
// __declspec apparently is only allowed before.
#ifndef HALIDE_ATTRIBUTE_ALIGN
#ifdef _MSC_VER
#define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))
#else
#define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))
#endif
#endif
/** A runtime tag for a type in the halide type system. Can be ints,
* unsigned ints, or floats of various bit-widths (the 'bits'
* field). Can also be vectors of the same (by setting the 'lanes'
* field to something larger than one). This struct should be
* exactly 32-bits in size. */
struct halide_type_t {
/** The basic type code: signed integer, unsigned integer, or floating point. */
#if __cplusplus >= 201103L
HALIDE_ATTRIBUTE_ALIGN(1) halide_type_code_t code; // halide_type_code_t
#else
HALIDE_ATTRIBUTE_ALIGN(1) uint8_t code; // halide_type_code_t
#endif
/** The number of bits of precision of a single scalar value of this type. */
HALIDE_ATTRIBUTE_ALIGN(1) uint8_t bits;
/** How many elements in a vector. This is 1 for scalar types. */
HALIDE_ATTRIBUTE_ALIGN(2) uint16_t lanes;
#ifdef __cplusplus
/** Construct a runtime representation of a Halide type from:
* code: The fundamental type from an enum.
* bits: The bit size of one element.
* lanes: The number of vector elements in the type. */
HALIDE_ALWAYS_INLINE halide_type_t(halide_type_code_t code, uint8_t bits, uint16_t lanes = 1)
: code(code), bits(bits), lanes(lanes) {
}
/** Default constructor is required e.g. to declare halide_trace_event
* instances. */
HALIDE_ALWAYS_INLINE halide_type_t() : code((halide_type_code_t)0), bits(0), lanes(0) {}
/** Compare two types for equality. */
HALIDE_ALWAYS_INLINE bool operator==(const halide_type_t &other) const {
return (code == other.code &&
bits == other.bits &&
lanes == other.lanes);
}
HALIDE_ALWAYS_INLINE bool operator!=(const halide_type_t &other) const {
return !(*this == other);
}
/** Size in bytes for a single element, even if width is not 1, of this type. */
HALIDE_ALWAYS_INLINE int bytes() const { return (bits + 7) / 8; }
#endif
};
/** An opaque struct containing per-GPU API implementations of the
* device functions. */
struct halide_device_interface_impl_t;
/** Each GPU API provides a halide_device_interface_t struct pointing
* to the code that manages device allocations. You can access these
* functions directly from the struct member function pointers, or by
* calling the functions declared below. Note that the global
* functions are not available when using Halide as a JIT compiler.
* If you are using raw halide_buffer_t in that context you must use
* the function pointers in the device_interface struct.
*
* The function pointers below are currently the same for every GPU
* API; only the impl field varies. These top-level functions do the
* bookkeeping that is common across all GPU APIs, and then dispatch
* to more API-specific functions via another set of function pointers
* hidden inside the impl field.
*/
struct halide_device_interface_t {
int (*device_malloc)(void *user_context, struct halide_buffer_t *buf,
const struct halide_device_interface_t *device_interface);
int (*device_free)(void *user_context, struct halide_buffer_t *buf);
int (*device_sync)(void *user_context, struct halide_buffer_t *buf);
void (*device_release)(void *user_context,
const struct halide_device_interface_t *device_interface);
int (*copy_to_host)(void *user_context, struct halide_buffer_t *buf);
int (*copy_to_device)(void *user_context, struct halide_buffer_t *buf,
const struct halide_device_interface_t *device_interface);
int (*device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf,
const struct halide_device_interface_t *device_interface);
int (*device_and_host_free)(void *user_context, struct halide_buffer_t *buf);
int (*buffer_copy)(void *user_context, struct halide_buffer_t *src,
const struct halide_device_interface_t *dst_device_interface, struct halide_buffer_t *dst);
int (*device_crop)(void *user_context, const struct halide_buffer_t *src,
struct halide_buffer_t *dst);
int (*device_release_crop)(void *user_context, struct halide_buffer_t *buf);
int (*wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle,
const struct halide_device_interface_t *device_interface);
int (*detach_native)(void *user_context, struct halide_buffer_t *buf);
const struct halide_device_interface_impl_t *impl;
};
typedef struct halide_dimension_t {
int32_t min, extent, stride;
// Per-dimension flags. None are defined yet (This is reserved for future use).
uint32_t flags;
#ifdef __cplusplus
HALIDE_ALWAYS_INLINE halide_dimension_t() : min(0), extent(0), stride(0), flags(0) {}
HALIDE_ALWAYS_INLINE halide_dimension_t(int32_t m, int32_t e, int32_t s, uint32_t f = 0) :
min(m), extent(e), stride(s), flags(f) {}
HALIDE_ALWAYS_INLINE bool operator==(const halide_dimension_t &other) const {
return (min == other.min) &&
(extent == other.extent) &&
(stride == other.stride) &&
(flags == other.flags);
}
HALIDE_ALWAYS_INLINE bool operator!=(const halide_dimension_t &other) const {
return !(*this == other);
}
#endif
} halide_dimension_t;
#ifdef __cplusplus
} // extern "C"
#endif
typedef enum {halide_buffer_flag_host_dirty = 1,
halide_buffer_flag_device_dirty = 2} halide_buffer_flags;
/**
* The raw representation of an image passed around by generated
* Halide code. It includes some stuff to track whether the image is
* not actually in main memory, but instead on a device (like a
* GPU). For a more convenient C++ wrapper, use Halide::Buffer<T>. */
typedef struct halide_buffer_t {
/** A device-handle for e.g. GPU memory used to back this buffer. */
uint64_t device;
/** The interface used to interpret the above handle. */
const struct halide_device_interface_t *device_interface;
/** A pointer to the start of the data in main memory. In terms of
* the Halide coordinate system, this is the address of the min
* coordinates (defined below). */
uint8_t* host;
/** flags with various meanings. */
uint64_t flags;
/** The type of each buffer element. */
struct halide_type_t type;
/** The dimensionality of the buffer. */
int32_t dimensions;
/** The shape of the buffer. Halide does not own this array - you
* must manage the memory for it yourself. */
halide_dimension_t *dim;
/** Pads the buffer up to a multiple of 8 bytes */
void *padding;
} halide_buffer_t;
#ifdef __cplusplus
namespace {
template<typename T> struct check_is_pointer;
template<typename T> struct check_is_pointer<T *> {};
}
/** Construct the halide equivalent of a C type */
template<typename T>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of() {
// Create a compile-time error if T is not a pointer (without
// using any includes - this code goes into the runtime).
check_is_pointer<T> check;
(void)check;
return halide_type_t(halide_type_handle, 64);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<float>() {
return halide_type_t(halide_type_float, 32);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<double>() {
return halide_type_t(halide_type_float, 64);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<bool>() {
return halide_type_t(halide_type_uint, 1);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<uint8_t>() {
return halide_type_t(halide_type_uint, 8);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<uint16_t>() {
return halide_type_t(halide_type_uint, 16);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<uint32_t>() {
return halide_type_t(halide_type_uint, 32);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<uint64_t>() {
return halide_type_t(halide_type_uint, 64);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<int8_t>() {
return halide_type_t(halide_type_int, 8);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<int16_t>() {
return halide_type_t(halide_type_int, 16);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<int32_t>() {
return halide_type_t(halide_type_int, 32);
}
template<>
HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<int64_t>() {
return halide_type_t(halide_type_int, 64);
}
#endif
#endif // HALIDE_HALIDERUNTIME_H