Halide 16.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
gpu_context.h
Go to the documentation of this file.
1#if defined(TEST_OPENCL)
2// Implement OpenCL custom context.
3
4#define CL_TARGET_OPENCL_VERSION 120
5#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
6#ifdef __APPLE__
7#include <OpenCL/cl.h>
8#else
9#include <CL/cl.h>
10#endif
11
12// Create the global context. This is just a helper function not called by Halide.
13inline bool create_opencl_context(cl_context &cl_ctx, cl_command_queue &cl_q) {
14 cl_int err = 0;
15
16 const cl_uint maxPlatforms = 4;
17 cl_platform_id platforms[maxPlatforms];
18 cl_uint platformCount = 0;
19
20 err = clGetPlatformIDs(maxPlatforms, platforms, &platformCount);
21 if (err != CL_SUCCESS) {
22 printf("clGetPlatformIDs failed (%d)\n", err);
23 return false;
24 }
25
26 cl_platform_id platform = nullptr;
27
28 if (platformCount > 0) {
29 platform = platforms[0];
30 }
31 if (platform == nullptr) {
32 printf("Failed to get platform\n");
33 return false;
34 }
35
37
38 // Make sure we have a device
39 const cl_uint maxDevices = 4;
40 cl_device_id devices[maxDevices];
41 cl_uint deviceCount = 0;
42 err = clGetDeviceIDs(platform, device_type, maxDevices, devices, &deviceCount);
43 if (err != CL_SUCCESS) {
44 printf("clGetDeviceIDs failed (%d)\n", err);
45 return false;
46 }
47 if (deviceCount == 0) {
48 printf("Failed to get device\n");
49 return false;
50 }
51
52 cl_device_id dev = devices[deviceCount - 1];
53
54 // Create context and command queue.
56 0};
57 cl_ctx = clCreateContext(properties, 1, &dev, nullptr, nullptr, &err);
58 if (err != CL_SUCCESS) {
59 printf("clCreateContext failed (%d)\n", err);
60 return false;
61 }
62
63 cl_q = clCreateCommandQueue(cl_ctx, dev, 0, &err);
64 if (err != CL_SUCCESS) {
65 printf("clCreateCommandQueue failed (%d)\n", err);
66 return false;
67 }
68 return true;
69}
70
71inline void destroy_opencl_context(cl_context cl_ctx, cl_command_queue cl_q) {
72 clReleaseCommandQueue(cl_q);
73 clReleaseContext(cl_ctx);
74}
75
76#elif defined(TEST_CUDA)
77// Implement CUDA custom context.
78#include <cuda.h>
79
80inline bool create_cuda_context(CUcontext &cuda_ctx) {
81 // Initialize CUDA
82 CUresult err = cuInit(0);
83 if (err != CUDA_SUCCESS) {
84 printf("cuInit failed (%d)\n", err);
85 return false;
86 }
87
88 // Make sure we have a device
89 int deviceCount = 0;
90 err = cuDeviceGetCount(&deviceCount);
91 if (err != CUDA_SUCCESS) {
92 printf("cuGetDeviceCount failed (%d)\n", err);
93 return false;
94 }
95 if (deviceCount <= 0) {
96 printf("No CUDA devices available\n");
97 return false;
98 }
99
100 CUdevice dev;
101 // Get device
102 CUresult status;
103 // Try to get a device >0 first, since 0 should be our display device
104 // For now, don't try devices > 2 to maintain compatibility with previous behavior.
105 if (deviceCount > 2) deviceCount = 2;
106 for (int id = deviceCount - 1; id >= 0; id--) {
107 status = cuDeviceGet(&dev, id);
108 if (status == CUDA_SUCCESS) break;
109 }
110
111 if (status != CUDA_SUCCESS) {
112 printf("Failed to get CUDA device\n");
113 return status;
114 }
115
116 // Create context
117 err = cuCtxCreate(&cuda_ctx, 0, dev);
118 if (err != CUDA_SUCCESS) {
119 printf("cuCtxCreate failed (%d)\n", err);
120 return false;
121 }
122
123 return true;
124}
125
126inline void destroy_cuda_context(CUcontext cuda_ctx) {
127 cuCtxDestroy(cuda_ctx);
128}
129
130#elif defined(TEST_METAL) && defined(__OBJC__)
131#include <Metal/MTLCommandQueue.h>
132#include <Metal/MTLDevice.h>
133
134inline bool create_metal_context(id<MTLDevice> &device, id<MTLCommandQueue> &queue) {
135 device = MTLCreateSystemDefaultDevice();
136 if (device == nullptr) {
137 NSArray<id<MTLDevice>> *devices = MTLCopyAllDevices();
138 if (devices != nullptr) {
139 device = devices[0];
140 }
141 }
142 if (device == nullptr) {
143 printf("Failed to find Metal device.\n");
144 return false;
145 }
146 queue = [device newCommandQueue];
147 if (queue == nullptr) {
148 printf("Failed to create Metal command queue.\n");
149 return false;
150 }
151 return true;
152}
153
154inline void destroy_metal_context(id<MTLDevice> device, id<MTLCommandQueue> queue) {
155 [queue release];
156 [device release];
157}
158
159#elif defined(TEST_WEBGPU)
160
161#include "mini_webgpu.h"
162
163extern "C" {
164// TODO: Remove all of this when wgpuInstanceProcessEvents() is supported.
165// See https://github.com/halide/Halide/issues/7248
166#ifdef WITH_DAWN_NATIVE
167// From <unistd.h>, used to spin-lock while waiting for device initialization.
168int usleep(uint32_t);
169#else
170// Defined by Emscripten, and used to yield execution to asynchronous Javascript
171// work in combination with Emscripten's "Asyncify" mechanism.
172void emscripten_sleep(unsigned int ms);
173#endif
174}
175
176inline bool create_webgpu_context(WGPUInstance *instance_out, WGPUAdapter *adapter_out, WGPUDevice *device_out, WGPUBuffer *staging_buffer_out) {
177 struct Results {
178 WGPUInstance instance = nullptr;
179 WGPUAdapter adapter = nullptr;
180 WGPUDevice device = nullptr;
181 WGPUBuffer staging_buffer = nullptr;
182 bool success = true;
183 } results;
184
185 // TODO: Unify this when Emscripten implements wgpuCreateInstance().
186 // See https://github.com/halide/Halide/issues/7248
187#ifdef WITH_DAWN_NATIVE
189 desc.nextInChain = nullptr;
190 results.instance = wgpuCreateInstance(&desc);
191#else
192 results.instance = nullptr;
193#endif
194
195 auto request_adapter_callback = [](WGPURequestAdapterStatus status, WGPUAdapter adapter, char const *message, void *userdata) {
196 auto *results = (Results *)userdata;
197
198 if (status != WGPURequestAdapterStatus_Success) {
199 results->success = false;
200 return;
201 }
202 results->adapter = adapter;
203
204 // Use the defaults for most limits.
205 WGPURequiredLimits requestedLimits{};
206 requestedLimits.nextInChain = nullptr;
207 memset(&requestedLimits.limits, 0xFF, sizeof(WGPULimits));
208
209 // TODO: Enable for Emscripten when wgpuAdapterGetLimits is supported.
210 // See https://github.com/halide/Halide/issues/7248
211#ifdef WITH_DAWN_NATIVE
212 WGPUSupportedLimits supportedLimits{};
213 supportedLimits.nextInChain = nullptr;
214 if (!wgpuAdapterGetLimits(adapter, &supportedLimits)) {
215 results->success = false;
216 return;
217 } else {
218 // Raise the limits on buffer size and workgroup storage size.
219 requestedLimits.limits.maxBufferSize = supportedLimits.limits.maxBufferSize;
220 requestedLimits.limits.maxStorageBufferBindingSize = supportedLimits.limits.maxStorageBufferBindingSize;
221 requestedLimits.limits.maxComputeWorkgroupStorageSize = supportedLimits.limits.maxComputeWorkgroupStorageSize;
222 }
223#endif
224
226 desc.nextInChain = nullptr;
227 desc.label = nullptr;
228 desc.requiredFeaturesCount = 0;
229 desc.requiredFeatures = nullptr;
230 desc.requiredLimits = &requestedLimits;
231
232 auto request_device_callback = [](WGPURequestDeviceStatus status,
233 WGPUDevice device,
234 char const *message,
235 void *userdata) {
236 auto *results = (Results *)userdata;
237 if (status != WGPURequestDeviceStatus_Success) {
238 results->success = false;
239 return;
240 }
241 results->device = device;
242
243 auto device_lost_callback = [](WGPUDeviceLostReason reason,
244 char const *message,
245 void *userdata) {
246 fprintf(stderr, "WGPU Device Lost: %d %s", (int)reason, message);
247 abort();
248 };
249 wgpuDeviceSetDeviceLostCallback(device, device_lost_callback, userdata);
250
251 // Create a staging buffer for transfers.
252 constexpr int kStagingBufferSize = 4 * 1024 * 1024;
254 desc.nextInChain = nullptr;
255 desc.label = nullptr;
257 desc.size = kStagingBufferSize;
258 desc.mappedAtCreation = false;
259 results->staging_buffer = wgpuDeviceCreateBuffer(device, &desc);
260 if (results->staging_buffer == nullptr) {
261 results->success = false;
262 return;
263 }
264 };
265
266 wgpuAdapterRequestDevice(adapter, &desc, request_device_callback, userdata);
267 };
268
269 wgpuInstanceRequestAdapter(results.instance, nullptr, request_adapter_callback, &results);
270
271 // Wait for device initialization to complete.
272 while (!results.device && results.success) {
273 // TODO: Use wgpuInstanceProcessEvents() when it is supported.
274 // See https://github.com/halide/Halide/issues/7248
275#ifndef WITH_DAWN_NATIVE
276 emscripten_sleep(10);
277#else
278 usleep(1000);
279#endif
280 }
281
282 *instance_out = results.instance;
283 *adapter_out = results.adapter;
284 *device_out = results.device;
285 *staging_buffer_out = results.staging_buffer;
286 return results.success;
287}
288
289inline void destroy_webgpu_context(WGPUInstance instance, WGPUAdapter adapter, WGPUDevice device, WGPUBuffer staging_buffer) {
290 wgpuDeviceSetDeviceLostCallback(device, nullptr, nullptr);
291 wgpuBufferRelease(staging_buffer);
292 wgpuDeviceRelease(device);
293 wgpuAdapterRelease(adapter);
294
295 // TODO: Unify this when Emscripten supports wgpuInstanceRelease().
296 // See https://github.com/halide/Halide/issues/7248
297#ifdef WITH_DAWN_NATIVE
298 wgpuInstanceRelease(instance);
299#endif
300}
301
302#endif
struct _cl_platform_id * cl_platform_id
Definition mini_cl.h:55
intptr_t cl_context_properties
Definition mini_cl.h:78
int32_t cl_int
Definition mini_cl.h:44
cl_bitfield cl_device_type
Definition mini_cl.h:67
uint32_t cl_uint
Definition mini_cl.h:45
#define CL_DEVICE_TYPE_ALL
Definition mini_cl.h:218
#define CL_CONTEXT_PLATFORM
Definition mini_cl.h:330
#define CL_SUCCESS
Definition mini_cl.h:133
struct _cl_context * cl_context
Definition mini_cl.h:57
struct _cl_device_id * cl_device_id
Definition mini_cl.h:56
struct _cl_command_queue * cl_command_queue
Definition mini_cl.h:58
WGPURequestAdapterStatus
@ WGPURequestAdapterStatus_Success
WGPU_EXPORT void wgpuDeviceRelease(WGPUDevice device)
WGPUDeviceLostReason
@ WGPUBufferUsage_MapRead
@ WGPUBufferUsage_CopyDst
WGPURequestDeviceStatus
@ WGPURequestDeviceStatus_Success
struct WGPUInstanceImpl * WGPUInstance
Definition mini_webgpu.h:74
WGPU_EXPORT void wgpuAdapterRequestDevice(WGPUAdapter adapter, WGPUDeviceDescriptor const *descriptor, WGPURequestDeviceCallback callback, void *userdata)
WGPU_EXPORT void wgpuAdapterRelease(WGPUAdapter adapter)
struct WGPUDeviceImpl * WGPUDevice
Definition mini_webgpu.h:72
WGPU_EXPORT void wgpuInstanceRelease(WGPUInstance instance)
WGPU_EXPORT void wgpuDeviceSetDeviceLostCallback(WGPUDevice device, WGPUDeviceLostCallback callback, void *userdata)
WGPU_EXPORT WGPUBuffer wgpuDeviceCreateBuffer(WGPUDevice device, WGPUBufferDescriptor const *descriptor)
WGPU_EXPORT bool wgpuAdapterGetLimits(WGPUAdapter adapter, WGPUSupportedLimits *limits)
WGPU_EXPORT WGPUInstance wgpuCreateInstance(WGPUInstanceDescriptor const *descriptor)
WGPU_EXPORT void wgpuInstanceRequestAdapter(WGPUInstance instance, WGPURequestAdapterOptions const *options, WGPURequestAdapterCallback callback, void *userdata)
WGPU_EXPORT void wgpuBufferRelease(WGPUBuffer buffer)
struct WGPUAdapterImpl * WGPUAdapter
Definition mini_webgpu.h:64
struct WGPUBufferImpl * WGPUBuffer
Definition mini_webgpu.h:67
void * memset(void *s, int val, size_t n)
unsigned __INT32_TYPE__ uint32_t
void abort()
WGPUChainedStruct const * nextInChain
WGPUChainedStruct const * nextInChain
WGPUChainedStruct const * nextInChain
WGPUChainedStruct const * nextInChain
WGPUChainedStructOut * nextInChain