From 06308bd8f959e51d0109983ccf8da18e06c25d83 Mon Sep 17 00:00:00 2001 From: MaYuhang <2902139028@qq.com> Date: Wed, 17 Dec 2025 16:59:43 +0800 Subject: [PATCH] =?UTF-8?q?issue/800:=20=E6=B7=BB=E5=8A=A0OpenCL=E8=BF=90?= =?UTF-8?q?=E8=A1=8C=E6=97=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/infinicore.h | 1 + src/infinirt-test/main.cc | 7 + src/infinirt/infinirt.cc | 8 +- src/infinirt/opencl/infinirt_opencl.cc | 299 +++++++++++++++++++++++++ src/infinirt/opencl/infinirt_opencl.h | 24 ++ xmake.lua | 15 ++ xmake/opencl.lua | 26 +++ 7 files changed, 379 insertions(+), 1 deletion(-) create mode 100644 src/infinirt/opencl/infinirt_opencl.cc create mode 100644 src/infinirt/opencl/infinirt_opencl.h create mode 100644 xmake/opencl.lua diff --git a/include/infinicore.h b/include/infinicore.h index 49654937e..3d2b82cb8 100644 --- a/include/infinicore.h +++ b/include/infinicore.h @@ -47,6 +47,7 @@ typedef enum { INFINI_DEVICE_KUNLUN = 7, INFINI_DEVICE_HYGON = 8, INFINI_DEVICE_QY = 9, + INFINI_DEVICE_OPENCL = 10, INFINI_DEVICE_TYPE_COUNT } infiniDevice_t; diff --git a/src/infinirt-test/main.cc b/src/infinirt-test/main.cc index e6613a262..91269fd61 100644 --- a/src/infinirt-test/main.cc +++ b/src/infinirt-test/main.cc @@ -23,6 +23,7 @@ void printUsage() { << " qy" << std::endl << " kunlun" << std::endl << " hygon" << std::endl + << " opencl" << std::endl << std::endl; exit(EXIT_FAILURE); } @@ -55,6 +56,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) { else PARSE_DEVICE("--qy", INFINI_DEVICE_QY) else PARSE_DEVICE("--kunlun", INFINI_DEVICE_KUNLUN) else PARSE_DEVICE("--hygon", INFINI_DEVICE_HYGON) + else PARSE_DEVICE("--opencl", INFINI_DEVICE_OPENCL) else { printUsage(); } @@ -73,6 +75,11 @@ int main(int argc, char *argv[]) { std::cout << "Testing Device: " << args.device_type << std::endl; infiniDevice_t device = args.device_type; + // 初始化 + if (infinirtInit() != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to init device" << std::endl; + } + // 获取设备总数 std::vector deviceCounts(INFINI_DEVICE_TYPE_COUNT, 0); if (infinirtGetAllDeviceCount(deviceCounts.data()) != INFINI_STATUS_SUCCESS) { diff --git a/src/infinirt/infinirt.cc b/src/infinirt/infinirt.cc index 3d37f4f4c..ed7e21c7e 100644 --- a/src/infinirt/infinirt.cc +++ b/src/infinirt/infinirt.cc @@ -7,13 +7,16 @@ #include "kunlun/infinirt_kunlun.h" #include "metax/infinirt_metax.h" #include "moore/infinirt_moore.h" +#include "opencl/infinirt_opencl.h" thread_local infiniDevice_t CURRENT_DEVICE_TYPE = INFINI_DEVICE_CPU; thread_local int CURRENT_DEVICE_ID = 0; __C infiniStatus_t infinirtInit() { -#ifdef ENABLE_ASCEND_API +#if defined(ENABLE_ASCEND_API) CHECK_STATUS(infinirt::ascend::init()); +#elif defined(ENABLE_OPENCL_API) + CHECK_STATUS(infinirt::opencl::init()); #endif return INFINI_STATUS_SUCCESS; } @@ -79,6 +82,9 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_ case INFINI_DEVICE_HYGON: \ _status = infinirt::hygon::API PARAMS; \ break; \ + case INFINI_DEVICE_OPENCL: \ + _status = infinirt::opencl::API PARAMS; \ + break; \ default: \ _status = INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; \ } \ diff --git a/src/infinirt/opencl/infinirt_opencl.cc b/src/infinirt/opencl/infinirt_opencl.cc new file mode 100644 index 000000000..61837f71d --- /dev/null +++ b/src/infinirt/opencl/infinirt_opencl.cc @@ -0,0 +1,299 @@ +#include "infinirt_opencl.h" +#include "../../utils.h" +#include +#include +#include + +#define CHECK_CLRT(RT_API) CHECK_INTERNAL(RT_API, CL_SUCCESS) + +static std::mutex init_mutex; +static cl_platform_id platform = nullptr; +static cl_context context = nullptr; +static std::vector devices; +static std::vector> queues; +static cl_uint device_count = 0; +static bool initialized = false; +thread_local int CUR_DEV_ID = 0; +struct InfinirtEventStruct { + cl_event ev = nullptr; + bool bound = false; +}; + +namespace infinirt::opencl { +static void cleanupResources() { + if (!queues.empty()) { + for (auto &qvec : queues) { + for (auto q : qvec) { + if (q) { + clReleaseCommandQueue(q); + } + } + qvec.clear(); + } + queues.clear(); + } + if (context) { + clReleaseContext(context); + context = nullptr; + } + devices.clear(); + device_count = 0; + platform = nullptr; + initialized = false; +} +infiniStatus_t init() { + std::lock_guard lk(init_mutex); + if (initialized) { + return INFINI_STATUS_SUCCESS; + } + cl_int err = CL_SUCCESS; + cl_uint num_platforms = 0; + err = clGetPlatformIDs(1, nullptr, &num_platforms); + if (err != CL_SUCCESS) { + cleanupResources(); + return INFINI_STATUS_DEVICE_NOT_INITIALIZED; + } + if (num_platforms == 0) { + return INFINI_STATUS_DEVICE_NOT_FOUND; + } + err = clGetPlatformIDs(1, &platform, nullptr); + if (err != CL_SUCCESS) { + cleanupResources(); + return INFINI_STATUS_DEVICE_NOT_INITIALIZED; + } + err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, nullptr, &device_count); + if (err != CL_SUCCESS) { + cleanupResources(); + return INFINI_STATUS_DEVICE_NOT_INITIALIZED; + } + if (device_count == 0) { + return INFINI_STATUS_DEVICE_NOT_FOUND; + } + devices.resize(static_cast(device_count)); + err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, device_count, devices.data(), nullptr); + if (err != CL_SUCCESS) { + cleanupResources(); + return INFINI_STATUS_DEVICE_NOT_INITIALIZED; + } + context = clCreateContext(nullptr, device_count, devices.data(), nullptr, nullptr, &err); + if (err != CL_SUCCESS) { + cleanupResources(); + return INFINI_STATUS_DEVICE_NOT_INITIALIZED; + } + + queues.resize(static_cast(device_count)); + for (cl_uint i = 0; i < device_count; ++i) { + cl_command_queue q = clCreateCommandQueueWithProperties(context, devices[i], nullptr, &err); + if (err != CL_SUCCESS) { + // 清理已创建的队列和 context + cleanupResources(); + return INFINI_STATUS_DEVICE_NOT_INITIALIZED; + } + queues[i].push_back(q); // 初始队列作为默认队列在 index 0 + } + initialized = true; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t getDeviceCount(int *count) { + if (!count) { + return INFINI_STATUS_BAD_PARAM; + } + std::lock_guard lk(init_mutex); + if (!initialized) { + return INFINI_STATUS_DEVICE_NOT_INITIALIZED; + } + *count = static_cast(device_count); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t setDevice(int device_id) { + std::lock_guard lk(init_mutex); + if (!initialized) { + return INFINI_STATUS_DEVICE_NOT_INITIALIZED; + } + if (device_id < 0 || device_id >= static_cast(device_count)) { + return INFINI_STATUS_DEVICE_NOT_FOUND; + } + CUR_DEV_ID = device_id; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t deviceSynchronize() { + for (auto &q : queues[CUR_DEV_ID]) { + if (q) { + CHECK_CLRT(clFinish(q)); + } + } + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) { + cl_int err; + cl_command_queue queue = clCreateCommandQueueWithProperties(context, devices[CUR_DEV_ID], nullptr, &err); + CHECK_CLRT(err); + { + std::lock_guard lk(init_mutex); + queues[CUR_DEV_ID].push_back(queue); + } + *stream_ptr = queue; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t streamDestroy(infinirtStream_t stream) { + { + std::lock_guard lk(init_mutex); + auto &qvec = queues[CUR_DEV_ID]; + auto it = std::find(qvec.begin(), qvec.end(), (cl_command_queue)stream); + if (it != qvec.end()) { + qvec.erase(it); + } + } + CHECK_CLRT(clReleaseCommandQueue((cl_command_queue)stream)); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t streamSynchronize(infinirtStream_t stream) { + CHECK_CLRT(clFinish((cl_command_queue)stream)); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) { + InfinirtEventStruct *evs = static_cast(event); + CHECK_CLRT(clWaitForEvents(1, &evs->ev)); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) { + InfinirtEventStruct *event = new InfinirtEventStruct(); + *event_ptr = static_cast(event); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t eventCreateWithFlags(infinirtEvent_t *event_ptr, uint32_t flags) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) { + InfinirtEventStruct *evs = static_cast(event); + CHECK_CLRT(clEnqueueMarkerWithWaitList((cl_command_queue)stream, 0, nullptr, &evs->ev)); + evs->bound = true; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) { + InfinirtEventStruct *evs = static_cast(event); + if (!evs->ev) { + return INFINI_STATUS_INTERNAL_ERROR; + } + cl_int status; + CHECK_CLRT(clGetEventInfo(evs->ev, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, nullptr)); + if (status == CL_COMPLETE) { + *status_ptr = INFINIRT_EVENT_COMPLETE; + } else { + *status_ptr = INFINIRT_EVENT_NOT_READY; + } + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t eventSynchronize(infinirtEvent_t event) { + InfinirtEventStruct *evs = static_cast(event); + if (!evs->ev) { + return INFINI_STATUS_INTERNAL_ERROR; + } + CHECK_CLRT(clWaitForEvents(1, &evs->ev)); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t eventDestroy(infinirtEvent_t event) { + InfinirtEventStruct *evs = static_cast(event); + if (!evs->ev) { + return INFINI_STATUS_INTERNAL_ERROR; + } + CHECK_CLRT(clReleaseEvent(evs->ev)); + evs->ev = nullptr; + delete evs; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t eventElapsedTime(float *ms_ptr, infinirtEvent_t start, infinirtEvent_t end) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t mallocDevice(void **p_ptr, size_t size) { + void *p = clSVMAlloc(context, CL_MEM_READ_WRITE, size, 0); + if (!p) { + return INFINI_STATUS_NULL_POINTER; + } + *p_ptr = p; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t mallocHost(void **p_ptr, size_t size) { + void *p = clSVMAlloc(context, CL_MEM_READ_WRITE, size, 0); + if (!p) { + return INFINI_STATUS_NULL_POINTER; + } + *p_ptr = p; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t freeDevice(void *ptr) { + clSVMFree(context, ptr); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t freeHost(void *ptr) { + clSVMFree(context, ptr); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) { + cl_command_queue default_queue = queues[CUR_DEV_ID][0]; + CHECK_CLRT(clEnqueueSVMMemcpy(default_queue, CL_TRUE, dst, src, size, 0, nullptr, nullptr)); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) { + CHECK_CLRT(clEnqueueSVMMemcpy((cl_command_queue)stream, CL_FALSE, dst, src, size, 0, nullptr, nullptr)); + return INFINI_STATUS_SUCCESS; +} +infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) { + return INFINI_STATUS_INTERNAL_ERROR; +} + +infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) { + return INFINI_STATUS_INTERNAL_ERROR; +} +infiniStatus_t getOpenclDevice(infinirtOpenclDevice_t *cl_device) { + if (cl_device == nullptr) { + return INFINI_STATUS_NULL_POINTER; + } + *cl_device = static_cast(devices[CUR_DEV_ID]); + return INFINI_STATUS_SUCCESS; +} +infiniStatus_t getOpenclContext(infinirtOpenclContext_t *cl_context) { + if (cl_context == nullptr) { + return INFINI_STATUS_NULL_POINTER; + } + *cl_context = static_cast(context); + return INFINI_STATUS_SUCCESS; +} +infiniStatus_t getOpenclStream(infinirtOpenclStream_t *cl_queue) { + if (cl_queue == nullptr) { + return INFINI_STATUS_NULL_POINTER; + } + *cl_queue = static_cast(queues[CUR_DEV_ID][0]); + return INFINI_STATUS_SUCCESS; +} +} // namespace infinirt::opencl + +__C infiniStatus_t infinirtGetOpenclDevice(infinirtOpenclDevice_t *cl_device) { + return infinirt::opencl::getOpenclDevice(cl_device); +} +__C infiniStatus_t infinirtGetOpenclContext(infinirtOpenclContext_t *cl_context) { + return infinirt::opencl::getOpenclContext(cl_context); +} +__C infiniStatus_t infinirtGetOpenclStream(infinirtOpenclStream_t *cl_queue) { + return infinirt::opencl::getOpenclStream(cl_queue); +} diff --git a/src/infinirt/opencl/infinirt_opencl.h b/src/infinirt/opencl/infinirt_opencl.h new file mode 100644 index 000000000..438070a16 --- /dev/null +++ b/src/infinirt/opencl/infinirt_opencl.h @@ -0,0 +1,24 @@ +#ifndef __INFINIRT_OPENCL_H__ +#define __INFINIRT_OPENCL_H__ +#include "../infinirt_impl.h" + +typedef void *infinirtOpenclDevice_t; +typedef void *infinirtOpenclContext_t; +typedef void *infinirtOpenclStream_t; + +__C __export infiniStatus_t infinirtGetOpenclDevice(infinirtOpenclDevice_t *cl_device); +__C __export infiniStatus_t infinirtGetOpenclContext(infinirtOpenclContext_t *cl_context); +__C __export infiniStatus_t infinirtGetOpenclStream(infinirtOpenclStream_t *cl_command_queue); + +#ifdef __cplusplus +namespace infinirt::opencl { +#ifdef ENABLE_OPENCL_API +infiniStatus_t init(); +INFINIRT_DEVICE_API_IMPL +#else +INFINIRT_DEVICE_API_NOOP +#endif +} // namespace infinirt::opencl +#endif // __cplusplus + +#endif // __INFINIRT_OPENCL_H__ diff --git a/xmake.lua b/xmake.lua index a3e756da8..32ce49426 100644 --- a/xmake.lua +++ b/xmake.lua @@ -182,6 +182,18 @@ if has_config("ninetoothed") then add_defines("ENABLE_NINETOOTHED") end +-- OpenCL +option("opencl") + set_default(false) + set_showmenu(true) + set_description("Whether to compile implementations for OpenCL backend") +option_end() + +if has_config("opencl") then + add_defines("ENABLE_OPENCL_API") + includes("xmake/opencl.lua") +end + -- InfiniCCL option("ccl") set_default(false) @@ -250,6 +262,9 @@ target("infinirt") if has_config("hygon-dcu") then add_deps("infinirt-hygon") end + if has_config("opencl") then + add_deps("infinirt-opencl") + end set_languages("cxx17") set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")) add_files("src/infinirt/*.cc") diff --git a/xmake/opencl.lua b/xmake/opencl.lua new file mode 100644 index 000000000..255881aa3 --- /dev/null +++ b/xmake/opencl.lua @@ -0,0 +1,26 @@ +local OPENCL_HEADERS = os.getenv("OPENCL_HEADERS") +local OPENCL_LIB = os.getenv("OPENCL_LIB") + +if not (OPENCL_HEADERS and OPENCL_LIB) then + raise("Please set OPENCL_HEADERS and OPENCL_LIB environment variables") +end + +target("infinirt-opencl") + set_kind("static") + add_deps("infini-utils") + on_install(function (target) end) + set_warnings("all", "error") + set_languages("cxx17") + + on_load(function (target) + target:add("includedirs", OPENCL_HEADERS) + target:add("linkdirs", OPENCL_LIB) + target:add("links", "OpenCL") + end) + + if not is_plat("windows") then + add_cxflags("-fPIC") + end + + add_files("../src/infinirt/opencl/*.cc") +target_end()