Halide 14.0.0
Halide compiler and libraries
CodeGen_GPU_Dev.h
Go to the documentation of this file.
1#ifndef HALIDE_CODEGEN_GPU_DEV_H
2#define HALIDE_CODEGEN_GPU_DEV_H
3
4/** \file
5 * Defines the code-generator interface for producing GPU device code
6 */
7#include <string>
8#include <vector>
9
10#include "DeviceArgument.h"
11#include "Expr.h"
12
13namespace Halide {
14namespace Internal {
15
16/** A code generator that emits GPU code from a given Halide stmt. */
19
20 /** Compile a GPU kernel into the module. This may be called many times
21 * with different kernels, which will all be accumulated into a single
22 * source module shared by a given Halide pipeline. */
23 virtual void add_kernel(Stmt stmt,
24 const std::string &name,
25 const std::vector<DeviceArgument> &args) = 0;
26
27 /** (Re)initialize the GPU kernel module. This is separate from compile,
28 * since a GPU device module will often have many kernels compiled into it
29 * for a single pipeline. */
30 virtual void init_module() = 0;
31
32 virtual std::vector<char> compile_to_src() = 0;
33
34 virtual std::string get_current_kernel_name() = 0;
35
36 virtual void dump() = 0;
37
38 /** This routine returns the GPU API name that is combined into
39 * runtime routine names to ensure each GPU API has a unique
40 * name.
41 */
42 virtual std::string api_unique_name() = 0;
43
44 /** Returns the specified name transformed by the variable naming rules
45 * for the GPU language backend. Used to determine the name of a parameter
46 * during host codegen. */
47 virtual std::string print_gpu_name(const std::string &name) = 0;
48
49 /** Allows the GPU device specific code to request halide_type_t
50 * values to be passed to the kernel_run routine rather than just
51 * argument type sizes.
52 */
53 virtual bool kernel_run_takes_types() const {
54 return false;
55 }
56
57 static bool is_gpu_var(const std::string &name);
58 static bool is_gpu_block_var(const std::string &name);
59 static bool is_gpu_thread_var(const std::string &name);
60
61 /** Checks if expr is block uniform, i.e. does not depend on a thread
62 * var. */
63 static bool is_block_uniform(const Expr &expr);
64 /** Checks if the buffer is a candidate for constant storage. Most
65 * GPUs (APIs) support a constant memory storage class that cannot be
66 * written to and performs well for block uniform accesses. A buffer is a
67 * candidate for constant storage if it is never written to, and loads are
68 * uniform within the workgroup. */
69 static bool is_buffer_constant(const Stmt &kernel, const std::string &buffer);
70
71 /** Modifies predicated loads and stores to be non-predicated, since most
72 * GPU backends do not support predication. */
74
75 /** An mask describing which type of memory fence to use for the gpu_thread_barrier()
76 * intrinsic. Not all GPUs APIs support all types.
77 */
79 None = 0, // No fence required (just a sync)
80 Device = 1, // Device/global memory fence
81 Shared = 2 // Threadgroup/shared memory fence
82 };
83};
84
85} // namespace Internal
86} // namespace Halide
87
88#endif
Defines helpers for passing arguments to separate devices, such as GPUs.
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
A fragment of Halide syntax.
Definition: Expr.h:256
A code generator that emits GPU code from a given Halide stmt.
static bool is_gpu_thread_var(const std::string &name)
static bool is_gpu_var(const std::string &name)
virtual void init_module()=0
(Re)initialize the GPU kernel module.
static bool is_gpu_block_var(const std::string &name)
static bool is_block_uniform(const Expr &expr)
Checks if expr is block uniform, i.e.
MemoryFenceType
An mask describing which type of memory fence to use for the gpu_thread_barrier() intrinsic.
static Stmt scalarize_predicated_loads_stores(Stmt &s)
Modifies predicated loads and stores to be non-predicated, since most GPU backends do not support pre...
virtual std::string get_current_kernel_name()=0
virtual std::string print_gpu_name(const std::string &name)=0
Returns the specified name transformed by the variable naming rules for the GPU language backend.
virtual bool kernel_run_takes_types() const
Allows the GPU device specific code to request halide_type_t values to be passed to the kernel_run ro...
virtual std::vector< char > compile_to_src()=0
virtual std::string api_unique_name()=0
This routine returns the GPU API name that is combined into runtime routine names to ensure each GPU ...
static bool is_buffer_constant(const Stmt &kernel, const std::string &buffer)
Checks if the buffer is a candidate for constant storage.
virtual void add_kernel(Stmt stmt, const std::string &name, const std::vector< DeviceArgument > &args)=0
Compile a GPU kernel into the module.
A reference-counted handle to a statement node.
Definition: Expr.h:417