forked from OSchip/llvm-project
551 lines
21 KiB
C++
551 lines
21 KiB
C++
//===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// This file defines the standard OpenCL implementation of the Acxxel API.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "acxxel.h"
|
|
|
|
#include "CL/cl.h"
|
|
|
|
#include <mutex>
|
|
#include <sstream>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace acxxel {
|
|
|
|
namespace {
|
|
|
|
/// An ID containing the platform ID and the device ID within the platform.
|
|
struct FullDeviceID {
|
|
cl_platform_id PlatformID;
|
|
cl_device_id DeviceID;
|
|
|
|
FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID)
|
|
: PlatformID(PlatformID), DeviceID(DeviceID) {}
|
|
};
|
|
|
|
static std::string getOpenCLErrorMessage(cl_int Result) {
|
|
if (!Result)
|
|
return "success";
|
|
std::ostringstream OutStream;
|
|
OutStream << "OpenCL error: code = " << Result;
|
|
return OutStream.str();
|
|
}
|
|
|
|
static Status getOpenCLError(cl_int Result, const std::string &Message) {
|
|
if (!Result)
|
|
return Status();
|
|
std::ostringstream OutStream;
|
|
OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message;
|
|
return Status(OutStream.str());
|
|
}
|
|
|
|
static void logOpenCLWarning(cl_int Result, const std::string &Message) {
|
|
if (Result) {
|
|
std::ostringstream OutStream;
|
|
OutStream << Message << ": " << getOpenCLErrorMessage(Result);
|
|
logWarning(OutStream.str());
|
|
}
|
|
}
|
|
|
|
class OpenCLPlatform : public Platform {
|
|
public:
|
|
~OpenCLPlatform() override = default;
|
|
|
|
static Expected<OpenCLPlatform> create();
|
|
|
|
Expected<int> getDeviceCount() override;
|
|
|
|
Expected<Stream> createStream(int DeviceIndex) override;
|
|
|
|
Expected<Event> createEvent(int DeviceIndex) override;
|
|
|
|
Expected<Program> createProgramFromSource(Span<const char> Source,
|
|
int DeviceIndex) override;
|
|
|
|
protected:
|
|
Status streamSync(void *Stream) override;
|
|
|
|
Status streamWaitOnEvent(void *Stream, void *Event) override;
|
|
|
|
Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
|
|
HandleDestructor getDeviceMemoryHandleDestructor() override;
|
|
void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
|
|
size_t ByteOffset) override;
|
|
void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
|
|
|
|
Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
|
|
int DeviceIndex) override;
|
|
Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
|
|
int DeviceIndex) override;
|
|
|
|
Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
|
|
HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
|
|
|
|
Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
|
|
HandleDestructor getFreeHostMemoryHandleDestructor() override;
|
|
|
|
Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
|
|
void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
|
|
ptrdiff_t ByteCount, void *Stream) override;
|
|
Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
|
|
void *HostDst, ptrdiff_t ByteCount,
|
|
void *Stream) override;
|
|
Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
|
|
ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
|
|
void *Stream) override;
|
|
|
|
Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
|
|
ptrdiff_t ByteCount, char ByteValue,
|
|
void *Stream) override;
|
|
|
|
Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
|
|
|
|
Status enqueueEvent(void *Event, void *Stream) override;
|
|
bool eventIsDone(void *Event) override;
|
|
Status eventSync(void *Event) override;
|
|
Expected<float> getSecondsBetweenEvents(void *StartEvent,
|
|
void *EndEvent) override;
|
|
|
|
Expected<void *> rawCreateKernel(void *Program,
|
|
const std::string &Name) override;
|
|
HandleDestructor getKernelHandleDestructor() override;
|
|
|
|
Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
|
|
KernelLaunchDimensions LaunchDimensions,
|
|
Span<void *> Arguments,
|
|
Span<size_t> ArgumentSizes,
|
|
size_t SharedMemoryBytes) override;
|
|
|
|
private:
|
|
OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs,
|
|
std::vector<cl_context> &&Contexts,
|
|
std::vector<cl_command_queue> &&CommandQueues)
|
|
: FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)),
|
|
CommandQueues(std::move(CommandQueues)) {}
|
|
|
|
std::vector<FullDeviceID> FullDeviceIDs;
|
|
std::vector<cl_context> Contexts;
|
|
std::vector<cl_command_queue> CommandQueues;
|
|
};
|
|
|
|
Expected<OpenCLPlatform> OpenCLPlatform::create() {
|
|
constexpr cl_uint MaxNumEntries = 100;
|
|
cl_platform_id Platforms[MaxNumEntries];
|
|
cl_uint NumPlatforms;
|
|
if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms))
|
|
return getOpenCLError(Result, "clGetPlatformIDs");
|
|
|
|
std::vector<FullDeviceID> FullDeviceIDs;
|
|
for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms;
|
|
++PlatformIndex) {
|
|
cl_uint NumDevices;
|
|
cl_device_id Devices[MaxNumEntries];
|
|
if (cl_int Result =
|
|
clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL,
|
|
MaxNumEntries, Devices, &NumDevices))
|
|
return getOpenCLError(Result, "clGetDeviceIDs");
|
|
for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex)
|
|
FullDeviceIDs.emplace_back(Platforms[PlatformIndex],
|
|
Devices[DeviceIndex]);
|
|
}
|
|
|
|
if (FullDeviceIDs.empty())
|
|
return Status("No OpenCL device available on this system.");
|
|
|
|
std::vector<cl_context> Contexts(FullDeviceIDs.size());
|
|
std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size());
|
|
for (size_t I = 0; I < FullDeviceIDs.size(); ++I) {
|
|
cl_int CreateContextResult;
|
|
Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID,
|
|
nullptr, nullptr, &CreateContextResult);
|
|
if (CreateContextResult)
|
|
return getOpenCLError(CreateContextResult, "clCreateContext");
|
|
|
|
cl_int CreateCommandQueueResult;
|
|
CommandQueues[I] = clCreateCommandQueue(
|
|
Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE,
|
|
&CreateCommandQueueResult);
|
|
if (CreateCommandQueueResult)
|
|
return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue");
|
|
}
|
|
|
|
return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts),
|
|
std::move(CommandQueues));
|
|
}
|
|
|
|
Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); }
|
|
|
|
static void openCLDestroyStream(void *H) {
|
|
logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)),
|
|
"clReleaseCommandQueue");
|
|
}
|
|
|
|
Expected<Stream> OpenCLPlatform::createStream(int DeviceIndex) {
|
|
cl_int Result;
|
|
cl_command_queue Queue = clCreateCommandQueue(
|
|
Contexts[DeviceIndex], FullDeviceIDs[DeviceIndex].DeviceID,
|
|
CL_QUEUE_PROFILING_ENABLE, &Result);
|
|
if (Result)
|
|
return getOpenCLError(Result, "clCreateCommandQueue");
|
|
return constructStream(this, DeviceIndex, Queue, openCLDestroyStream);
|
|
}
|
|
|
|
static void openCLEventDestroy(void *H) {
|
|
cl_event *CLEvent = static_cast<cl_event *>(H);
|
|
logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent");
|
|
delete CLEvent;
|
|
}
|
|
|
|
Status OpenCLPlatform::streamSync(void *Stream) {
|
|
return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)),
|
|
"clFinish");
|
|
}
|
|
|
|
Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) {
|
|
cl_event *CLEvent = static_cast<cl_event *>(Event);
|
|
return getOpenCLError(
|
|
clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1,
|
|
CLEvent, nullptr),
|
|
"clEnqueueMarkerWithWaitList");
|
|
}
|
|
|
|
Expected<Event> OpenCLPlatform::createEvent(int DeviceIndex) {
|
|
cl_int Result;
|
|
cl_event Event = clCreateUserEvent(Contexts[DeviceIndex], &Result);
|
|
if (Result)
|
|
return getOpenCLError(Result, "clCreateUserEvent");
|
|
if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE))
|
|
return getOpenCLError(Result, "clSetUserEventStatus");
|
|
return constructEvent(this, DeviceIndex, new cl_event(Event),
|
|
openCLEventDestroy);
|
|
}
|
|
|
|
static void openCLDestroyProgram(void *H) {
|
|
logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)),
|
|
"clReleaseProgram");
|
|
}
|
|
|
|
Expected<Program>
|
|
OpenCLPlatform::createProgramFromSource(Span<const char> Source,
|
|
int DeviceIndex) {
|
|
cl_int Error;
|
|
const char *CSource = Source.data();
|
|
size_t SourceSize = Source.size();
|
|
cl_program Program = clCreateProgramWithSource(Contexts[DeviceIndex], 1,
|
|
&CSource, &SourceSize, &Error);
|
|
if (Error)
|
|
return getOpenCLError(Error, "clCreateProgramWithSource");
|
|
cl_device_id DeviceID = FullDeviceIDs[DeviceIndex].DeviceID;
|
|
if (cl_int Error =
|
|
clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr))
|
|
return getOpenCLError(Error, "clBuildProgram");
|
|
return constructProgram(this, Program, openCLDestroyProgram);
|
|
}
|
|
|
|
Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount,
|
|
int DeviceIndex) {
|
|
cl_int Result;
|
|
cl_mem Memory = clCreateBuffer(Contexts[DeviceIndex], CL_MEM_READ_WRITE,
|
|
ByteCount, nullptr, &Result);
|
|
if (Result)
|
|
return getOpenCLError(Result, "clCreateBuffer");
|
|
return reinterpret_cast<void *>(Memory);
|
|
}
|
|
|
|
static void openCLDestroyDeviceMemory(void *H) {
|
|
logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)),
|
|
"clReleaseMemObject");
|
|
}
|
|
|
|
HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() {
|
|
return openCLDestroyDeviceMemory;
|
|
}
|
|
|
|
void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle,
|
|
size_t ByteSize,
|
|
size_t ByteOffset) {
|
|
cl_int Error;
|
|
cl_buffer_region Region;
|
|
Region.origin = ByteOffset;
|
|
Region.size = ByteSize;
|
|
cl_mem SubBuffer =
|
|
clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0,
|
|
CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error);
|
|
logOpenCLWarning(Error, "clCreateSubBuffer");
|
|
if (Error)
|
|
return nullptr;
|
|
return SubBuffer;
|
|
}
|
|
|
|
void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) {
|
|
openCLDestroyDeviceMemory(Handle);
|
|
}
|
|
|
|
Expected<void *>
|
|
OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/,
|
|
int /*DeviceIndex*/) {
|
|
// This doesn't seem to have any equivalent in OpenCL.
|
|
return Status("not implemented");
|
|
}
|
|
|
|
Expected<ptrdiff_t>
|
|
OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/,
|
|
int /*DeviceIndex*/) {
|
|
// This doesn't seem to have any equivalent in OpenCL.
|
|
return Status("not implemented");
|
|
}
|
|
|
|
static void noOpHandleDestructor(void *) {}
|
|
|
|
Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/,
|
|
ptrdiff_t /*ByteCount*/) {
|
|
// TODO(jhen): Do we want to do something to pin the memory here?
|
|
return Status();
|
|
}
|
|
|
|
HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() {
|
|
// TODO(jhen): Do we want to unpin the memory here?
|
|
return noOpHandleDestructor;
|
|
}
|
|
|
|
Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
|
|
// TODO(jhen): Do we want to do something to pin the memory here?
|
|
return std::malloc(ByteCount);
|
|
}
|
|
|
|
static void freeMemoryHandleDestructor(void *Memory) {
|
|
// TODO(jhen): Do we want to unpin the memory here?
|
|
std::free(Memory);
|
|
}
|
|
|
|
HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() {
|
|
return freeMemoryHandleDestructor;
|
|
}
|
|
|
|
Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc,
|
|
ptrdiff_t DeviceSrcByteOffset,
|
|
void *DeviceDst,
|
|
ptrdiff_t DeviceDstByteOffset,
|
|
ptrdiff_t ByteCount, void *Stream) {
|
|
return getOpenCLError(
|
|
clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream),
|
|
static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
|
|
static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
|
|
DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr),
|
|
"clEnqueueCopyBuffer");
|
|
}
|
|
|
|
Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc,
|
|
ptrdiff_t DeviceSrcByteOffset,
|
|
void *HostDst, ptrdiff_t ByteCount,
|
|
void *Stream) {
|
|
return getOpenCLError(
|
|
clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream),
|
|
static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
|
|
CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0,
|
|
nullptr, nullptr),
|
|
"clEnqueueReadBuffer");
|
|
}
|
|
|
|
Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
|
|
ptrdiff_t DeviceDstByteOffset,
|
|
ptrdiff_t ByteCount, void *Stream) {
|
|
return getOpenCLError(
|
|
clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream),
|
|
static_cast<cl_mem>(DeviceDst), CL_TRUE,
|
|
DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
|
|
nullptr),
|
|
"clEnqueueWriteBuffer");
|
|
}
|
|
|
|
Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
|
|
ptrdiff_t ByteCount, char ByteValue,
|
|
void *Stream) {
|
|
return getOpenCLError(
|
|
clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream),
|
|
static_cast<cl_mem>(DeviceDst), &ByteValue, 1,
|
|
ByteOffset, ByteCount, 0, nullptr, nullptr),
|
|
"clEnqueueFillBuffer");
|
|
}
|
|
|
|
struct StreamCallbackUserData {
|
|
StreamCallbackUserData(Stream &TheStream, StreamCallback Function,
|
|
cl_event EndEvent)
|
|
: TheStream(TheStream), TheFunction(std::move(Function)),
|
|
EndEvent(EndEvent) {}
|
|
|
|
Stream &TheStream;
|
|
StreamCallback TheFunction;
|
|
cl_event EndEvent;
|
|
};
|
|
|
|
// A function with the right signature to pass to clSetEventCallback.
|
|
void CL_CALLBACK openCLStreamCallbackShim(cl_event,
|
|
cl_int EventCommandExecStatus,
|
|
void *UserData) {
|
|
std::unique_ptr<StreamCallbackUserData> Data(
|
|
static_cast<StreamCallbackUserData *>(UserData));
|
|
Data->TheFunction(
|
|
Data->TheStream,
|
|
getOpenCLError(EventCommandExecStatus, "stream callback error state"));
|
|
if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE))
|
|
logOpenCLWarning(Result, "clSetUserEventStatus");
|
|
if (cl_int Result = clReleaseEvent(Data->EndEvent))
|
|
logOpenCLWarning(Result, "clReleaseEvent");
|
|
}
|
|
|
|
Status OpenCLPlatform::addStreamCallback(Stream &TheStream,
|
|
StreamCallback Callback) {
|
|
cl_int Result;
|
|
cl_event StartEvent =
|
|
clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
|
|
if (Result)
|
|
return getOpenCLError(Result, "clCreateUserEvent");
|
|
cl_event EndEvent =
|
|
clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
|
|
if (Result)
|
|
return getOpenCLError(Result, "clCreateUserEvent");
|
|
cl_event StartBarrierEvent;
|
|
if (cl_int Result = clEnqueueBarrierWithWaitList(
|
|
static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
|
|
&StartEvent, &StartBarrierEvent))
|
|
return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
|
|
|
|
if (cl_int Result = clEnqueueBarrierWithWaitList(
|
|
static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
|
|
&EndEvent, nullptr))
|
|
return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
|
|
|
|
std::unique_ptr<StreamCallbackUserData> UserData(
|
|
new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent));
|
|
if (cl_int Result =
|
|
clSetEventCallback(StartBarrierEvent, CL_RUNNING,
|
|
openCLStreamCallbackShim, UserData.release()))
|
|
return getOpenCLError(Result, "clSetEventCallback");
|
|
|
|
if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE))
|
|
return getOpenCLError(Result, "clSetUserEventStatus");
|
|
|
|
if (cl_int Result = clReleaseEvent(StartBarrierEvent))
|
|
return getOpenCLError(Result, "clReleaseEvent");
|
|
|
|
return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent");
|
|
}
|
|
|
|
Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) {
|
|
cl_event *CLEvent = static_cast<cl_event *>(Event);
|
|
cl_event OldEvent = *CLEvent;
|
|
cl_event NewEvent;
|
|
if (cl_int Result = clEnqueueMarkerWithWaitList(
|
|
static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent))
|
|
return getOpenCLError(Result, "clEnqueueMarkerWithWaitList");
|
|
*CLEvent = NewEvent;
|
|
return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent");
|
|
}
|
|
|
|
bool OpenCLPlatform::eventIsDone(void *Event) {
|
|
cl_event *CLEvent = static_cast<cl_event *>(Event);
|
|
cl_int EventStatus;
|
|
logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
|
sizeof(EventStatus), &EventStatus, nullptr),
|
|
"clGetEventInfo");
|
|
return EventStatus == CL_COMPLETE || EventStatus < 0;
|
|
}
|
|
|
|
Status OpenCLPlatform::eventSync(void *Event) {
|
|
cl_event *CLEvent = static_cast<cl_event *>(Event);
|
|
return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents");
|
|
}
|
|
|
|
Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent,
|
|
void *EndEvent) {
|
|
cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent);
|
|
cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent);
|
|
|
|
cl_profiling_info ParamName = CL_PROFILING_COMMAND_END;
|
|
cl_ulong StartNanoseconds;
|
|
cl_ulong EndNanoseconds;
|
|
if (cl_int Result =
|
|
clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong),
|
|
&StartNanoseconds, nullptr))
|
|
return getOpenCLError(Result, "clGetEventProfilingInfo");
|
|
if (cl_int Result = clGetEventProfilingInfo(
|
|
*CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr))
|
|
return getOpenCLError(Result, "clGetEventProfilingInfo");
|
|
return (EndNanoseconds - StartNanoseconds) * 1e-12;
|
|
}
|
|
|
|
Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program,
|
|
const std::string &Name) {
|
|
|
|
cl_int Error;
|
|
cl_kernel Kernel =
|
|
clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error);
|
|
if (Error)
|
|
return getOpenCLError(Error, "clCreateKernel");
|
|
return Kernel;
|
|
}
|
|
|
|
static void openCLDestroyKernel(void *H) {
|
|
logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)),
|
|
"clReleaseKernel");
|
|
}
|
|
|
|
HandleDestructor OpenCLPlatform::getKernelHandleDestructor() {
|
|
return openCLDestroyKernel;
|
|
}
|
|
|
|
Status OpenCLPlatform::rawEnqueueKernelLaunch(
|
|
void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
|
|
Span<void *> Arguments, Span<size_t> ArgumentSizes,
|
|
size_t SharedMemoryBytes) {
|
|
if (SharedMemoryBytes != 0)
|
|
return Status("OpenCL kernel launches only accept zero for the shared "
|
|
"memory byte size");
|
|
cl_kernel TheKernel = static_cast<cl_kernel>(Kernel);
|
|
for (int I = 0; I < Arguments.size(); ++I)
|
|
if (cl_int Error =
|
|
clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I]))
|
|
return getOpenCLError(Error, "clSetKernelArg");
|
|
size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY,
|
|
LaunchDimensions.BlockZ};
|
|
size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX,
|
|
LaunchDimensions.BlockY * LaunchDimensions.GridY,
|
|
LaunchDimensions.BlockZ * LaunchDimensions.GridZ};
|
|
return getOpenCLError(
|
|
clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel,
|
|
3, nullptr, GlobalWorkSize, LocalWorkSize, 0,
|
|
nullptr, nullptr),
|
|
"clEnqueueNDRangeKernel");
|
|
}
|
|
|
|
} // namespace
|
|
|
|
namespace opencl {
|
|
|
|
/// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a
|
|
/// Platform.
|
|
Expected<Platform *> getPlatform() {
|
|
static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> {
|
|
Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create();
|
|
if (CreationResult.isError())
|
|
return CreationResult.getError();
|
|
else
|
|
return new OpenCLPlatform(CreationResult.takeValue());
|
|
}();
|
|
return MaybePlatform;
|
|
}
|
|
|
|
} // namespace opencl
|
|
|
|
} // namespace acxxel
|